# HG changeset patch
# User rmarenco
# Date 1469095131 14400
# Node ID acc233161f507fc697d749279d6a1b249bc25c53
# Parent 4f9847539a28d8bc7269536292b15a3ae00bbab8
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
diff -r 4f9847539a28 -r acc233161f50 Bam.py
--- a/Bam.py Wed Jul 20 12:29:08 2016 -0400
+++ b/Bam.py Thu Jul 21 05:58:51 2016 -0400
@@ -15,12 +15,8 @@
class Bam( Datatype ):
- def __init__( self, input_bam_false_path, data_bam ,
- inputFastaFile, extra_files_path, tool_directory ):
- super(Bam, self).__init__( input_fasta_file=inputFastaFile,
- extra_files_path=extra_files_path,
- tool_directory=tool_directory,
- )
+ def __init__(self, input_bam_false_path, data_bam):
+ super(Bam, self).__init__()
self.track = None
diff -r 4f9847539a28 -r acc233161f50 Bam.pyc
Binary file Bam.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 Bed.py
--- a/Bed.py Wed Jul 20 12:29:08 2016 -0400
+++ b/Bed.py Thu Jul 21 05:58:51 2016 -0400
@@ -11,19 +11,14 @@
class Bed( Datatype ):
- def __init__( self, inputBedGeneric, data_bed_generic,
- inputFastaFile, extra_files_path, tool_directory ):
- super(Bed, self).__init__(
- inputFastaFile, extra_files_path, tool_directory
- )
+ def __init__( self, inputBedGeneric, data_bed_generic):
+ super(Bed, self).__init__()
self.track = None
self.inputBedGeneric = inputBedGeneric
self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
- self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
- self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
self.data_bed_generic = data_bed_generic
self.name_bed_generic = self.data_bed_generic["name"]
@@ -32,15 +27,6 @@
# Sort processing
subtools.sort(self.inputBedGeneric, self.sortedBedFile.name)
- # Generate the chrom.sizes
- # TODO: Isolate in a function
- # We first get the twoBit Infos
- subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name)
-
- # Then we get the output to inject into the sort
- # TODO: Check if no errors
- subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name)
-
# bedToBigBed processing
# TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb
trackName = "".join( ( self.name_bed_generic, ".bb") )
diff -r 4f9847539a28 -r acc233161f50 Bed.pyc
Binary file Bed.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 BedSimpleRepeats.py
--- a/BedSimpleRepeats.py Wed Jul 20 12:29:08 2016 -0400
+++ b/BedSimpleRepeats.py Thu Jul 21 05:58:51 2016 -0400
@@ -10,40 +10,26 @@
class BedSimpleRepeats( Datatype ):
- def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats,
- input_fasta_file, extra_files_path, tool_directory ):
+ def __init__(self, input_bed_simple_repeats_false_path, data_bed_simple_repeats):
- super(BedSimpleRepeats, self).__init__(
- input_fasta_file, extra_files_path, tool_directory
- )
+ super(BedSimpleRepeats, self).__init__()
self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path
self.name_bed_simple_repeats = data_bed_simple_repeats["name"]
self.priority = data_bed_simple_repeats["order_index"]
sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
- twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
- chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
# Sort processing
subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name)
- # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf
- # Generate the chrom.sizes
-
- subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
-
- # Then we get the output to inject into the sort
- # TODO: Check if no errors
- subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
-
# bedToBigBed processing
# TODO: Change the name of the bb, to tool + genome + .bb
trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) )
myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as'))
with open(myBigBedFilePath, 'w') as bigBedFile:
- subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name,
+ subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name,
typeOption='-type=bed4+12',
autoSql=auto_sql_option)
diff -r 4f9847539a28 -r acc233161f50 BedSimpleRepeats.pyc
Binary file BedSimpleRepeats.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 BigWig.py
--- a/BigWig.py Wed Jul 20 12:29:08 2016 -0400
+++ b/BigWig.py Thu Jul 21 05:58:51 2016 -0400
@@ -10,11 +10,8 @@
class BigWig( Datatype ):
- def __init__(self, input_bigwig_path, data_bigwig,
- input_fasta_path, extra_files_path, tool_directory):
- super(BigWig, self).__init__(
- input_fasta_path, extra_files_path, tool_directory
- )
+ def __init__(self, input_bigwig_path, data_bigwig):
+ super(BigWig, self).__init__()
self.track = None
diff -r 4f9847539a28 -r acc233161f50 BigWig.pyc
Binary file BigWig.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 Datatype.py
--- a/Datatype.py Wed Jul 20 12:29:08 2016 -0400
+++ b/Datatype.py Thu Jul 21 05:58:51 2016 -0400
@@ -6,6 +6,7 @@
"""
import os
+import tempfile
from util import subtools
@@ -14,32 +15,54 @@
twoBitFile = None
- def __init__( self, input_fasta_file, extra_files_path, tool_directory ):
+ input_fasta_file = None
+ extra_files_path = None
+ tool_directory = None
- self.input_fasta_file = input_fasta_file
- self.extra_files_path = extra_files_path
- self.tool_directory = tool_directory
+ mySpecieFolderPath = None
+ myTrackFolderPath = None
+
+ twoBitFile = None
+ chromSizesFile = None
- self.twoBitFile = None
+ def __init__(self):
- # Construction of the arborescence
- # TODO: Change the hard-coded path with a input based one
- self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3")
+ not_init_message = "The {0} is not initialized." \
+ "Did you use pre_init static method first?"
+ if Datatype.input_fasta_file is None:
+ raise TypeError(not_init_message.format('reference genome'))
+ if Datatype.extra_files_path is None:
+ raise TypeError(not_init_message.format('track Hub path'))
+ if Datatype.tool_directory is None:
+ raise TypeError(not_init_message.format('tool directory'))
+
- # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object
- self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks")
+ @staticmethod
+ def pre_init(reference_genome, two_bit_path, chrom_sizes_file,
+ extra_files_path, tool_directory, specie_folder, tracks_folder):
+ Datatype.extra_files_path = extra_files_path
+ Datatype.tool_directory = tool_directory
- # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator?
+ # TODO: All this should be in TrackHub and not in Datatype
+ Datatype.mySpecieFolderPath = specie_folder
+ Datatype.myTrackFolderPath = tracks_folder
+
+ Datatype.input_fasta_file = reference_genome
+
# 2bit file creation from input fasta
- if not Datatype.twoBitFile:
- print "We create the self.twoBit in " + self.__class__.__name__
- Datatype.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath)
+ Datatype.twoBitFile = two_bit_path
+ Datatype.chromSizesFile = chrom_sizes_file
- # TODO: Remove this by saying to all children classes to use "Datatype.twoBitFile" instead
- self.twoBitFile = Datatype.twoBitFile
+ @staticmethod
+ def get_largest_scaffold_name(self):
+ # We can get the biggest scaffold here, with chromSizesFile
+ with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes:
+ # TODO: Check if exists
+ return chrom_sizes.readline().split()[0]
+ # TODO: Rename for PEP8
def getShortName( self, name_to_shortify ):
# Slice to get from Long label the short label
short_label_slice = slice(0, 15)
- return name_to_shortify[short_label_slice]
\ No newline at end of file
+ return name_to_shortify[short_label_slice]
diff -r 4f9847539a28 -r acc233161f50 Datatype.pyc
Binary file Datatype.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 Gff3.py
--- a/Gff3.py Wed Jul 20 12:29:08 2016 -0400
+++ b/Gff3.py Thu Jul 21 05:58:51 2016 -0400
@@ -11,11 +11,8 @@
class Gff3( Datatype ):
- def __init__( self, input_Gff3_false_path, data_gff3,
- input_fasta_false_path, extra_files_path, tool_directory ):
- super( Gff3, self ).__init__(
- input_fasta_false_path, extra_files_path, tool_directory
- )
+ def __init__(self, input_Gff3_false_path, data_gff3):
+ super( Gff3, self ).__init__()
self.track = None
@@ -29,8 +26,6 @@
sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
# TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py)
- twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
- chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
# gff3ToGenePred processing
subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name)
@@ -42,19 +37,14 @@
# Sort processing
subtools.sort(unsortedBedFile.name, sortedBedFile.name)
- # Generate the twoBitInfo
- subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
-
- # Then we get the output to generate the chromSizes
# TODO: Check if no errors
- subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
# bedToBigBed processing
# TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb
trackName = "".join( (self.name_gff3, ".bb" ) )
myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
with open(myBigBedFilePath, 'w') as bigBedFile:
- subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+ subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name)
# Create the Track Object
dataURL = "tracks/%s" % trackName
diff -r 4f9847539a28 -r acc233161f50 Gff3.pyc
Binary file Gff3.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 Gtf.py
--- a/Gtf.py Wed Jul 20 12:29:08 2016 -0400
+++ b/Gtf.py Thu Jul 21 05:58:51 2016 -0400
@@ -11,11 +11,9 @@
class Gtf( Datatype ):
- def __init__( self, input_gtf_false_path, data_gtf,
- input_fasta_file, extra_files_path, tool_directory ):
- super(Gtf, self).__init__( input_fasta_file=input_fasta_file,
- extra_files_path=extra_files_path,
- tool_directory=tool_directory )
+ def __init__( self, input_gtf_false_path, data_gtf):
+
+ super(Gtf, self).__init__()
self.track = None
@@ -30,9 +28,6 @@
unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
- twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
- chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
-
# GtfToGenePred
subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name)
@@ -43,20 +38,12 @@
# Sort processing
subtools.sort(unsortedBedFile.name, sortedBedFile.name)
- # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class
- # Generate the twoBitInfo
- subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
-
- # Then we get the output to generate the chromSizes
- # TODO: Check if no errors
- subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
-
# bedToBigBed processing
# TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb
trackName = "".join( ( self.name_gtf, ".bb") )
myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
with open(myBigBedFilePath, 'w') as bigBedFile:
- subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+ subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name)
# Create the Track Object
dataURL = "tracks/%s" % trackName
diff -r 4f9847539a28 -r acc233161f50 Gtf.pyc
Binary file Gtf.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 TrackHub.py
--- a/TrackHub.py Wed Jul 20 12:29:08 2016 -0400
+++ b/TrackHub.py Thu Jul 21 05:58:51 2016 -0400
@@ -2,21 +2,35 @@
# -*- coding: utf8 -*-
import os
+import tempfile
+import shutil
import zipfile
+# Internal dependencies
+from Datatype import Datatype
+from util import subtools
+
from mako.lookup import TemplateLookup
class TrackHub(object):
"""docstring for TrackHub"""
- def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory):
+ def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):
super(TrackHub, self).__init__()
self.rootAssemblyHub = None
+
self.mySpecieFolderPath = None
+ self.myTracksFolderPath = None
self.tool_directory = tool_directory
+ self.reference_genome = inputFastaFile
+ # TODO: Add the specie name
+ self.genome_name = inputFastaFile.assembly_id
+ self.default_pos = None
+ self.user_email = user_email
+
# TODO: Modify according to the files passed in parameter
mylookup = TemplateLookup(directories=[os.path.join(tool_directory, 'templates/trackDb')],
output_encoding='utf-8', encoding_errors='replace')
@@ -25,17 +39,21 @@
self.extra_files_path = extra_files_path
self.outputFile = outputFile
- inputFastaFile = open(inputFastaFile, 'r')
- #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, 'myHub.zip'), 'w', allowZip64=True)
-
# Create the structure of the Assembly Hub
# TODO: Merge the following processing into a function as it is also used in twoBitCreator
- baseNameFasta = os.path.basename(inputFastaFile.name)
- suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
- self.twoBitName = suffixTwoBit + '.2bit'
+ self.twoBitName = None
+ self.two_bit_final_path = None
+ self.chromSizesFile = None
+
+ self.default_pos = None
- self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory,
- extra_files_path=extra_files_path)
+ # Set all the missing variables of this class, and create physically the folders/files
+ self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)
+
+ # Init the Datatype
+ Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,
+ self.extra_files_path, self.tool_directory,
+ self.mySpecieFolderPath, self.myTracksFolderPath)
def createZip(self):
for root, dirs, files in os.walk(self.rootAssemblyHub):
@@ -60,124 +78,149 @@
def terminate(self):
# Just a test to output a simple HTML
+ # TODO: Create a class to handle the file object
+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates')],
+ output_encoding='utf-8', encoding_errors='replace')
+
+ mytemplate = mylookup.get_template('display.txt')
with open(self.outputFile, 'w') as htmlOutput:
- htmlOutput.write('')
- htmlOutput.write('
')
- htmlOutput.write('')
- htmlOutput.write('The following has been generated by Hub Archive Creator:')
- htmlOutput.write('
')
- htmlOutput.write('')
+ # TODO: We are basically looping two times: One time with os.walk, Second time
+ # with the template. We could improve that if the number of files begins to be really important
+ list_relative_file_path = [ ]
for root, dirs, files in os.walk(self.extra_files_path):
for file in files:
- relDir = os.path.relpath(root, self.extra_files_path)
- htmlOutput.write(str.format('- {1}
', os.path.join(relDir, file),
- os.path.join(relDir, file)))
- htmlOutput.write('')
- htmlOutput.write('')
- htmlOutput.write('')
+ relative_directory = os.path.relpath(root, self.extra_files_path)
+ relative_file_path = os.path.join(relative_directory, file)
+ list_relative_file_path.append(relative_file_path)
+
+ htmlMakoRendered = mytemplate.render(
+ list_relative_file_path=list_relative_file_path
+ )
+ htmlOutput.write(htmlMakoRendered)
+
+ def __createAssemblyHub__(self, extra_files_path):
+ # Get all necessaries infos first
+ # 2bit file creation from input fasta
- def __createAssemblyHub__(self, toolDirectory, extra_files_path):
+ # baseNameFasta = os.path.basename(fasta_file_name)
+ # suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+ # nameTwoBit = suffixTwoBit + '.2bit'
+ twoBitFile = tempfile.NamedTemporaryFile(bufsize=0)
+ subtools.faToTwoBit(self.reference_genome.false_path, twoBitFile.name)
+
+ # Generate the twoBitInfo
+ twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+ subtools.twoBitInfo(twoBitFile.name, twoBitInfoFile.name)
+
+ # Then we get the output to generate the chromSizes
+ self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+ subtools.sortChromSizes(twoBitInfoFile.name, self.chromSizesFile.name)
+
+ # We can get the biggest scaffold here, with chromSizesFile
+ with open(self.chromSizesFile.name, 'r') as chrom_sizes:
+ # TODO: Check if exists
+ self.default_pos = chrom_sizes.readline().split()[0]
+
# TODO: Manage to put every fill Function in a file dedicated for reading reasons
# Create the root directory
myHubPath = os.path.join(extra_files_path, "myHub")
if not os.path.exists(myHubPath):
os.makedirs(myHubPath)
+ # Create the specie folder
+ # TODO: Generate the name depending on the specie
+ mySpecieFolderPath = os.path.join(myHubPath, self.genome_name)
+ if not os.path.exists(mySpecieFolderPath):
+ os.makedirs(mySpecieFolderPath)
+ self.mySpecieFolderPath = mySpecieFolderPath
+
+ # We create the 2bit file while we just created the specie folder
+ self.twoBitName = self.genome_name + ".2bit"
+ self.two_bit_final_path = os.path.join(self.mySpecieFolderPath, self.twoBitName)
+ shutil.copyfile(twoBitFile.name, self.two_bit_final_path)
+
# Add the genomes.txt file
genomesTxtFilePath = os.path.join(myHubPath, 'genomes.txt')
- self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory)
+ self.__fillGenomesTxt__(genomesTxtFilePath)
# Add the hub.txt file
hubTxtFilePath = os.path.join(myHubPath, 'hub.txt')
- self.__fillHubTxt__(hubTxtFilePath, toolDirectory)
+ self.__fillHubTxt__(hubTxtFilePath)
# Add the hub.html file
# TODO: Change the name and get it depending on the specie
hubHtmlFilePath = os.path.join(myHubPath, 'dbia.html')
- self.__fillHubHtmlFile__(hubHtmlFilePath, toolDirectory)
+ self.__fillHubHtmlFile__(hubHtmlFilePath)
- # Create the specie folder
- # TODO: Generate the name depending on the specie
- mySpecieFolderPath = os.path.join(myHubPath, "dbia3")
- if not os.path.exists(mySpecieFolderPath):
- os.makedirs(mySpecieFolderPath)
- self.mySpecieFolderPath = mySpecieFolderPath
# Create the description html file in the specie folder
descriptionHtmlFilePath = os.path.join(mySpecieFolderPath, 'description.html')
- self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath, toolDirectory)
+ self.__fillDescriptionHtmlFile__(descriptionHtmlFilePath)
# Create the file groups.txt
# TODO: If not inputs for this, do no create the file
groupsTxtFilePath = os.path.join(mySpecieFolderPath, 'groups.txt')
- self.__fillGroupsTxtFile__(groupsTxtFilePath, toolDirectory)
+ self.__fillGroupsTxtFile__(groupsTxtFilePath)
# Create the folder tracks into the specie folder
tracksFolderPath = os.path.join(mySpecieFolderPath, "tracks")
if not os.path.exists(tracksFolderPath):
os.makedirs(tracksFolderPath)
+ self.myTracksFolderPath = tracksFolderPath
return myHubPath
- def __fillGenomesTxt__(self, genomesTxtFilePath, toolDirectory):
+ def __fillGenomesTxt__(self, genomesTxtFilePath):
# TODO: Think about the inputs and outputs
# TODO: Manage the template of this file
# renderer = pystache.Renderer(search_dirs="templates/genomesAssembly")
- pathTemplate = os.path.join(toolDirectory, 'templates/genomesAssembly')
+ pathTemplate = os.path.join(self.tool_directory, 'templates/genomesAssembly')
mylookup = TemplateLookup(directories=[pathTemplate], output_encoding='utf-8', encoding_errors='replace')
mytemplate = mylookup.get_template("layout.txt")
with open(genomesTxtFilePath, 'w') as genomesTxtFile:
# Write the content of the file genomes.txt
- twoBitPath = os.path.join('dbia3/', self.twoBitName)
+ twoBitPath = os.path.join(self.genome_name, self.twoBitName)
htmlMakoRendered = mytemplate.render(
- genomeName="dbia3",
- trackDbPath="dbia3/trackDb.txt",
- groupsPath="dbia3/groups.txt",
- genomeDescription="March 2013 Drosophilia biarmipes unplaced genomic scaffold",
+ genomeName=self.genome_name,
+ trackDbPath=os.path.join(self.genome_name, "trackDb.txt"),
+ groupsPath=os.path.join(self.genome_name, "groups.txt"),
+ genomeDescription=self.genome_name,
twoBitPath=twoBitPath,
- organismName="Drosophilia biarmipes",
- defaultPosition="contig1",
+ organismName=self.genome_name,
+ defaultPosition=self.default_pos,
orderKey="4500",
- scientificName="Drosophilia biarmipes",
- pathAssemblyHtmlDescription="dbia3/description.html"
+ scientificName=self.genome_name,
+ pathAssemblyHtmlDescription=os.path.join(self.genome_name, "description.html")
)
genomesTxtFile.write(htmlMakoRendered)
- def __fillHubTxt__(self, hubTxtFilePath, toolDirectory):
+ def __fillHubTxt__(self, hubTxtFilePath):
# TODO: Think about the inputs and outputs
# TODO: Manage the template of this file
- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubTxt')],
+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubTxt')],
output_encoding='utf-8', encoding_errors='replace')
mytemplate = mylookup.get_template('layout.txt')
with open(hubTxtFilePath, 'w') as genomesTxtFile:
# Write the content of the file genomes.txt
htmlMakoRendered = mytemplate.render(
- hubName='dbiaOnly',
- shortLabel='dbia',
- longLabel='This hub only contains dbia with the gene predictions',
+ hubName=(''.join(['gonramp', self.genome_name.title()])),
+ shortLabel=self.genome_name,
+ longLabel=self.genome_name,
genomesFile='genomes.txt',
- email='rmarenco@gwu.edu',
+ email=self.user_email,
descriptionUrl='dbia.html'
)
genomesTxtFile.write(htmlMakoRendered)
- def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory):
+ def __fillHubHtmlFile__(self, hubHtmlFilePath):
# TODO: Think about the inputs and outputs
# TODO: Manage the template of this file
# renderer = pystache.Renderer(search_dirs="templates/hubDescription")
# t = Template(templates.hubDescription.layout.html)
- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/hubDescription')],
+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/hubDescription')],
output_encoding='utf-8', encoding_errors='replace')
mytemplate = mylookup.get_template("layout.txt")
with open(hubHtmlFilePath, 'w') as hubHtmlFile:
- # Write the content of the file genomes.txt
- # htmlPystached = renderer.render_name(
- # "layout",
- # {'specie': 'Dbia',
- # 'toolUsed': 'Augustus',
- # 'ncbiSpecieUrl': 'http://www.ncbi.nlm.nih.gov/genome/3499',
- # 'genomeID': '3499',
- # 'SpecieFullName': 'Drosophila biarmipes'})
htmlMakoRendered = mytemplate.render(
specie='Dbia',
toolUsed='Augustus',
@@ -185,13 +228,12 @@
genomeID='3499',
specieFullName='Drosophila biarmipes'
)
- # hubHtmlFile.write(htmlPystached)
- hubHtmlFile.write(htmlMakoRendered)
+ #hubHtmlFile.write(htmlMakoRendered)
- def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory):
+ def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):
# TODO: Think about the inputs and outputs
# TODO: Manage the template of this file
- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/specieDescription')],
+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/specieDescription')],
output_encoding='utf-8', encoding_errors='replace')
mytemplate = mylookup.get_template("layout.txt")
with open(descriptionHtmlFilePath, 'w') as descriptionHtmlFile:
@@ -199,11 +241,11 @@
htmlMakoRendered = mytemplate.render(
specieDescription='This is the description of the dbia',
)
- descriptionHtmlFile.write(htmlMakoRendered)
+ #descriptionHtmlFile.write(htmlMakoRendered)
- def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory):
+ def __fillGroupsTxtFile__(self, groupsTxtFilePath):
# TODO: Reenable this function at some point
- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, 'templates/groupsTxt')],
+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, 'templates/groupsTxt')],
output_encoding='utf-8', encoding_errors='replace')
mytemplate = mylookup.get_template("layout.txt")
with open(groupsTxtFilePath, 'w') as groupsTxtFile:
diff -r 4f9847539a28 -r acc233161f50 TrackHub.pyc
Binary file TrackHub.pyc has changed
diff -r 4f9847539a28 -r acc233161f50 hubArchiveCreator.py
--- a/hubArchiveCreator.py Wed Jul 20 12:29:08 2016 -0400
+++ b/hubArchiveCreator.py Thu Jul 21 05:58:51 2016 -0400
@@ -14,13 +14,14 @@
import sys
# Internal dependencies
-from TrackHub import TrackHub
-from Gff3 import Gff3
from Bam import Bam
from BedSimpleRepeats import BedSimpleRepeats
from Bed import Bed
from BigWig import BigWig
+from util.Fasta import Fasta
+from Gff3 import Gff3
from Gtf import Gtf
+from TrackHub import TrackHub
# TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
@@ -62,6 +63,10 @@
parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+ parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation')
+
+ parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID')
+
ucsc_tools_path = ''
toolDirectory = '.'
@@ -70,11 +75,20 @@
# Get the args passed in parameter
args = parser.parse_args()
- input_fasta_file = args.fasta
+ array_inputs_reference_genome = json.loads(args.fasta)
+
+ # TODO: Replace these with the object Fasta
+ input_fasta_file = array_inputs_reference_genome["false_path"]
+ input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
+ genome_name = sanitize_name_input(args.genome_name)
+
+ reference_genome = Fasta(array_inputs_reference_genome["false_path"],
+ input_fasta_file_name, genome_name)
+
+ user_email = args.user_email
# TODO: Add array for each input because we can add multiple -b for example + filter the data associated
-
array_inputs_gff3 = args.gff3
array_inputs_bed_simple_repeats = args.bedSimpleRepeats
array_inputs_bed_generic = args.bed
@@ -96,42 +110,38 @@
if args.extra_files_path:
extra_files_path = args.extra_files_path
- # TODO: Check here all the binaries / tools we need. Exception is missing
+ # TODO: Check here all the binaries / tools we need. Exception if missing
# Create the Track Hub folder
- trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
+ trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
all_datatype_dictionary = {}
+ datatype_parameters = (inputs_data, all_datatype_dictionary)
+
# Process Augustus
if array_inputs_gff3:
- create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, toolDirectory)
+ create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters)
- # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
+ # Process Bed simple repeats
if array_inputs_bed_simple_repeats:
- create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, toolDirectory)
+ create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters)
- # Process a Bed => tBlastN or TopHat
+ # Process Bed
if array_inputs_bed_generic:
- create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, toolDirectory)
+ create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters)
- # Process a GTF => Tophat
+ # Process GTF
if array_inputs_gtf:
- create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, toolDirectory)
+ create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters)
- # Process a Bam => Tophat
+ # Process Bam
if array_inputs_bam:
- create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, toolDirectory)
+ create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters)
- # Process a BigWig => From Bam
+ # Process BigWig
if array_inputs_bigwig:
- create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, toolDirectory)
+ create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters)
# Create Ordered Dictionary to add the tracks in the tool form order
all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
@@ -147,6 +157,10 @@
sys.exit(0)
+def sanitize_name_input(string_to_sanitize):
+ return string_to_sanitize \
+ .replace("/", "_") \
+ .replace(" ", "_")
def sanitize_name_inputs(inputs_data):
"""
@@ -156,22 +170,16 @@
:return:
"""
for key in inputs_data:
- inputs_data[key]["name"] = inputs_data[key]["name"]\
- .replace("/", "_")\
- .replace(" ", "_")
+ inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
-def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
- extra_files_path, all_datatype_dictionary, tool_directory):
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary):
"""
Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
and update the dictionary of datatype
:param ExtensionClass: T <= Datatype
:param array_inputs: list[string]
:param inputs_data:
- :param input_fasta_file: string
- :param extra_files_path: string
- :param tool_directory; string
"""
datatype_dictionary = {}
@@ -180,8 +188,8 @@
for input_false_path in array_inputs:
for key, data_value in inputs_data.items():
if key == input_false_path:
- extensionObject = ExtensionClass(input_false_path, data_value,
- input_fasta_file, extra_files_path, tool_directory)
+ extensionObject = ExtensionClass(input_false_path, data_value)
+
datatype_dictionary.update({data_value["order_index"]: extensionObject})
all_datatype_dictionary.update(datatype_dictionary)
diff -r 4f9847539a28 -r acc233161f50 hubArchiveCreator.xml
--- a/hubArchiveCreator.xml Wed Jul 20 12:29:08 2016 -0400
+++ b/hubArchiveCreator.xml Thu Jul 21 05:58:51 2016 -0400
@@ -31,6 +31,9 @@
mkdir -p $output.extra_files_path;
python $__tool_directory__/hubArchiveCreator.py
+ ## Ask the user to enter the genome name
+ --genome_name '$genome_name'
+
#import json
#set global data_parameter_dict = {}
@@ -81,19 +84,32 @@
#end if
#end for
+ ## We combine the fasta file dataset name with his false path in a JSON object
+ #set fasta_json = json.dumps({"false_path": str($fasta_file), "name": $fasta_file.name})
+ -f '$fasta_json'
+
## Dump the final json
#set all_data_json = json.dumps($data_parameter_dict)
- -f $Fasta_File
--data_json '$all_data_json'
+ ## Retrieve the user email
+ --user_email $__user_email__
+
-d $__tool_directory__ -e $output.files_path -o $output;
]]>
+
@@ -175,7 +191,7 @@
-
+
@@ -209,7 +225,7 @@
-
+