Previous changeset 9:4f9847539a28 (2016-07-20) Next changeset 11:d05236b15f81 (2016-07-27) |
Commit message:
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d |
modified:
Bam.py Bam.pyc Bed.py Bed.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc TrackHub.py TrackHub.pyc hubArchiveCreator.py hubArchiveCreator.xml util/subtools.py util/subtools.pyc |
added:
templates/display.txt util/Fasta.py util/Fasta.pyc |
removed:
templates/display.html |
b |
diff -r 4f9847539a28 -r acc233161f50 Bam.py --- a/Bam.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Bam.py Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -15,12 +15,8 @@ class Bam( Datatype ): - def __init__( self, input_bam_false_path, data_bam , - inputFastaFile, extra_files_path, tool_directory ): - super(Bam, self).__init__( input_fasta_file=inputFastaFile, - extra_files_path=extra_files_path, - tool_directory=tool_directory, - ) + def __init__(self, input_bam_false_path, data_bam): + super(Bam, self).__init__() self.track = None |
b |
diff -r 4f9847539a28 -r acc233161f50 Bam.pyc |
b |
Binary file Bam.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 Bed.py --- a/Bed.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Bed.py Thu Jul 21 05:58:51 2016 -0400 |
[ |
@@ -11,19 +11,14 @@ class Bed( Datatype ): - def __init__( self, inputBedGeneric, data_bed_generic, - inputFastaFile, extra_files_path, tool_directory ): - super(Bed, self).__init__( - inputFastaFile, extra_files_path, tool_directory - ) + def __init__( self, inputBedGeneric, data_bed_generic): + super(Bed, self).__init__() self.track = None self.inputBedGeneric = inputBedGeneric self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) self.data_bed_generic = data_bed_generic self.name_bed_generic = self.data_bed_generic["name"] @@ -32,15 +27,6 @@ # Sort processing subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) - # Generate the chrom.sizes - # TODO: Isolate in a function - # We first get the twoBit Infos - subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name) - - # Then we get the output to inject into the sort - # TODO: Check if no errors - subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( ( self.name_bed_generic, ".bb") ) |
b |
diff -r 4f9847539a28 -r acc233161f50 Bed.pyc |
b |
Binary file Bed.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 BedSimpleRepeats.py --- a/BedSimpleRepeats.py Wed Jul 20 12:29:08 2016 -0400 +++ b/BedSimpleRepeats.py Thu Jul 21 05:58:51 2016 -0400 |
[ |
@@ -10,40 +10,26 @@ class BedSimpleRepeats( Datatype ): - def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats, - input_fasta_file, extra_files_path, tool_directory ): + def __init__(self, input_bed_simple_repeats_false_path, data_bed_simple_repeats): - super(BedSimpleRepeats, self).__init__( - input_fasta_file, extra_files_path, tool_directory - ) + super(BedSimpleRepeats, self).__init__() self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path self.name_bed_simple_repeats = data_bed_simple_repeats["name"] self.priority = data_bed_simple_repeats["order_index"] sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # Sort processing subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name) - # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf - # Generate the chrom.sizes - - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to inject into the sort - # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + .bb trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as')) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name, + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name, typeOption='-type=bed4+12', autoSql=auto_sql_option) |
b |
diff -r 4f9847539a28 -r acc233161f50 BedSimpleRepeats.pyc |
b |
Binary file BedSimpleRepeats.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 BigWig.py --- a/BigWig.py Wed Jul 20 12:29:08 2016 -0400 +++ b/BigWig.py Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -10,11 +10,8 @@ class BigWig( Datatype ): - def __init__(self, input_bigwig_path, data_bigwig, - input_fasta_path, extra_files_path, tool_directory): - super(BigWig, self).__init__( - input_fasta_path, extra_files_path, tool_directory - ) + def __init__(self, input_bigwig_path, data_bigwig): + super(BigWig, self).__init__() self.track = None |
b |
diff -r 4f9847539a28 -r acc233161f50 BigWig.pyc |
b |
Binary file BigWig.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 Datatype.py --- a/Datatype.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Datatype.py Thu Jul 21 05:58:51 2016 -0400 |
[ |
@@ -6,6 +6,7 @@ """ import os +import tempfile from util import subtools @@ -14,32 +15,54 @@ twoBitFile = None - def __init__( self, input_fasta_file, extra_files_path, tool_directory ): + input_fasta_file = None + extra_files_path = None + tool_directory = None - self.input_fasta_file = input_fasta_file - self.extra_files_path = extra_files_path - self.tool_directory = tool_directory + mySpecieFolderPath = None + myTrackFolderPath = None + + twoBitFile = None + chromSizesFile = None - self.twoBitFile = None + def __init__(self): - # Construction of the arborescence - # TODO: Change the hard-coded path with a input based one - self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") + not_init_message = "The {0} is not initialized." \ + "Did you use pre_init static method first?" + if Datatype.input_fasta_file is None: + raise TypeError(not_init_message.format('reference genome')) + if Datatype.extra_files_path is None: + raise TypeError(not_init_message.format('track Hub path')) + if Datatype.tool_directory is None: + raise TypeError(not_init_message.format('tool directory')) + - # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object - self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks") + @staticmethod + def pre_init(reference_genome, two_bit_path, chrom_sizes_file, + extra_files_path, tool_directory, specie_folder, tracks_folder): + Datatype.extra_files_path = extra_files_path + Datatype.tool_directory = tool_directory - # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator? + # TODO: All this should be in TrackHub and not in Datatype + Datatype.mySpecieFolderPath = specie_folder + Datatype.myTrackFolderPath = tracks_folder + + Datatype.input_fasta_file = reference_genome + # 2bit file creation from input fasta - if not Datatype.twoBitFile: - print "We create the self.twoBit in " + self.__class__.__name__ - Datatype.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath) + Datatype.twoBitFile = two_bit_path + Datatype.chromSizesFile = chrom_sizes_file - # TODO: Remove this by saying to all children classes to use "Datatype.twoBitFile" instead - self.twoBitFile = Datatype.twoBitFile + @staticmethod + def get_largest_scaffold_name(self): + # We can get the biggest scaffold here, with chromSizesFile + with open(Datatype.chromSizesFile.name, 'r') as chrom_sizes: + # TODO: Check if exists + return chrom_sizes.readline().split()[0] + # TODO: Rename for PEP8 def getShortName( self, name_to_shortify ): # Slice to get from Long label the short label short_label_slice = slice(0, 15) - return name_to_shortify[short_label_slice] \ No newline at end of file + return name_to_shortify[short_label_slice] |
b |
diff -r 4f9847539a28 -r acc233161f50 Datatype.pyc |
b |
Binary file Datatype.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 Gff3.py --- a/Gff3.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Gff3.py Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -11,11 +11,8 @@ class Gff3( Datatype ): - def __init__( self, input_Gff3_false_path, data_gff3, - input_fasta_false_path, extra_files_path, tool_directory ): - super( Gff3, self ).__init__( - input_fasta_false_path, extra_files_path, tool_directory - ) + def __init__(self, input_Gff3_false_path, data_gff3): + super( Gff3, self ).__init__() self.track = None @@ -29,8 +26,6 @@ sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py) - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") # gff3ToGenePred processing subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name) @@ -42,19 +37,14 @@ # Sort processing subtools.sort(unsortedBedFile.name, sortedBedFile.name) - # Generate the twoBitInfo - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to generate the chromSizes # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( (self.name_gff3, ".bb" ) ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object dataURL = "tracks/%s" % trackName |
b |
diff -r 4f9847539a28 -r acc233161f50 Gff3.pyc |
b |
Binary file Gff3.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 Gtf.py --- a/Gtf.py Wed Jul 20 12:29:08 2016 -0400 +++ b/Gtf.py Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -11,11 +11,9 @@ class Gtf( Datatype ): - def __init__( self, input_gtf_false_path, data_gtf, - input_fasta_file, extra_files_path, tool_directory ): - super(Gtf, self).__init__( input_fasta_file=input_fasta_file, - extra_files_path=extra_files_path, - tool_directory=tool_directory ) + def __init__( self, input_gtf_false_path, data_gtf): + + super(Gtf, self).__init__() self.track = None @@ -30,9 +28,6 @@ unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") - twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) - chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") - # GtfToGenePred subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) @@ -43,20 +38,12 @@ # Sort processing subtools.sort(unsortedBedFile.name, sortedBedFile.name) - # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class - # Generate the twoBitInfo - subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) - - # Then we get the output to generate the chromSizes - # TODO: Check if no errors - subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) - # bedToBigBed processing # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb trackName = "".join( ( self.name_gtf, ".bb") ) myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) with open(myBigBedFilePath, 'w') as bigBedFile: - subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + subtools.bedToBigBed(sortedBedFile.name, self.chromSizesFile.name, bigBedFile.name) # Create the Track Object dataURL = "tracks/%s" % trackName |
b |
diff -r 4f9847539a28 -r acc233161f50 Gtf.pyc |
b |
Binary file Gtf.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 TrackHub.py --- a/TrackHub.py Wed Jul 20 12:29:08 2016 -0400 +++ b/TrackHub.py Thu Jul 21 05:58:51 2016 -0400 |
[ |
b'@@ -2,21 +2,35 @@\n # -*- coding: utf8 -*-\n \n import os\n+import tempfile\n+import shutil\n import zipfile\n \n+# Internal dependencies\n+from Datatype import Datatype\n+from util import subtools\n+\n from mako.lookup import TemplateLookup\n \n \n class TrackHub(object):\n """docstring for TrackHub"""\n \n- def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory):\n+ def __init__(self, inputFastaFile, user_email, outputFile, extra_files_path, tool_directory):\n super(TrackHub, self).__init__()\n \n self.rootAssemblyHub = None\n+\n self.mySpecieFolderPath = None\n+ self.myTracksFolderPath = None\n self.tool_directory = tool_directory\n \n+ self.reference_genome = inputFastaFile\n+ # TODO: Add the specie name\n+ self.genome_name = inputFastaFile.assembly_id\n+ self.default_pos = None\n+ self.user_email = user_email\n+\n # TODO: Modify according to the files passed in parameter\n mylookup = TemplateLookup(directories=[os.path.join(tool_directory, \'templates/trackDb\')],\n output_encoding=\'utf-8\', encoding_errors=\'replace\')\n@@ -25,17 +39,21 @@\n self.extra_files_path = extra_files_path\n self.outputFile = outputFile\n \n- inputFastaFile = open(inputFastaFile, \'r\')\n- #self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, \'myHub.zip\'), \'w\', allowZip64=True)\n-\n # Create the structure of the Assembly Hub\n # TODO: Merge the following processing into a function as it is also used in twoBitCreator\n- baseNameFasta = os.path.basename(inputFastaFile.name)\n- suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)\n- self.twoBitName = suffixTwoBit + \'.2bit\'\n+ self.twoBitName = None\n+ self.two_bit_final_path = None\n+ self.chromSizesFile = None\n+\n+ self.default_pos = None\n \n- self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory,\n- extra_files_path=extra_files_path)\n+ # Set all the missing variables of this class, and create physically the folders/files\n+ self.rootAssemblyHub = self.__createAssemblyHub__(extra_files_path=extra_files_path)\n+\n+ # Init the Datatype\n+ Datatype.pre_init(self.reference_genome, self.two_bit_final_path, self.chromSizesFile,\n+ self.extra_files_path, self.tool_directory,\n+ self.mySpecieFolderPath, self.myTracksFolderPath)\n \n def createZip(self):\n for root, dirs, files in os.walk(self.rootAssemblyHub):\n@@ -60,124 +78,149 @@\n \n def terminate(self):\n # Just a test to output a simple HTML\n+ # TODO: Create a class to handle the file object\n+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, \'templates\')],\n+ output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+\n+ mytemplate = mylookup.get_template(\'display.txt\')\n with open(self.outputFile, \'w\') as htmlOutput:\n- htmlOutput.write(\'<html>\')\n- htmlOutput.write(\'<body>\')\n- htmlOutput.write(\'<p>\')\n- htmlOutput.write(\'The following has been generated by Hub Archive Creator:\')\n- htmlOutput.write(\'</p>\')\n- htmlOutput.write(\'<ul>\')\n+ # TODO: We are basically looping two times: One time with os.walk, Second time\n+ # with the template. We could improve that if the number of files begins to be really important\n+ list_relative_file_path = [ ]\n for root, dirs, files in os.walk(self.extra_files_path):\n for file in files:\n- relDir = os.path.relpath(root, self.extra_files_path)\n- htmlOutput.write(str.format(\'<li><a href="{0}">{1}</a></li>\', os.path.join(relDir, file),\n- os.path.join(relDi'..b' output_encoding=\'utf-8\', encoding_errors=\'replace\')\n mytemplate = mylookup.get_template(\'layout.txt\')\n with open(hubTxtFilePath, \'w\') as genomesTxtFile:\n # Write the content of the file genomes.txt\n htmlMakoRendered = mytemplate.render(\n- hubName=\'dbiaOnly\',\n- shortLabel=\'dbia\',\n- longLabel=\'This hub only contains dbia with the gene predictions\',\n+ hubName=(\'\'.join([\'gonramp\', self.genome_name.title()])),\n+ shortLabel=self.genome_name,\n+ longLabel=self.genome_name,\n genomesFile=\'genomes.txt\',\n- email=\'rmarenco@gwu.edu\',\n+ email=self.user_email,\n descriptionUrl=\'dbia.html\'\n )\n genomesTxtFile.write(htmlMakoRendered)\n \n- def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory):\n+ def __fillHubHtmlFile__(self, hubHtmlFilePath):\n # TODO: Think about the inputs and outputs\n # TODO: Manage the template of this file\n # renderer = pystache.Renderer(search_dirs="templates/hubDescription")\n # t = Template(templates.hubDescription.layout.html)\n- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/hubDescription\')],\n+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, \'templates/hubDescription\')],\n output_encoding=\'utf-8\', encoding_errors=\'replace\')\n mytemplate = mylookup.get_template("layout.txt")\n with open(hubHtmlFilePath, \'w\') as hubHtmlFile:\n- # Write the content of the file genomes.txt\n- # htmlPystached = renderer.render_name(\n- # "layout",\n- # {\'specie\': \'Dbia\',\n- # \'toolUsed\': \'Augustus\',\n- # \'ncbiSpecieUrl\': \'http://www.ncbi.nlm.nih.gov/genome/3499\',\n- # \'genomeID\': \'3499\',\n- # \'SpecieFullName\': \'Drosophila biarmipes\'})\n htmlMakoRendered = mytemplate.render(\n specie=\'Dbia\',\n toolUsed=\'Augustus\',\n@@ -185,13 +228,12 @@\n genomeID=\'3499\',\n specieFullName=\'Drosophila biarmipes\'\n )\n- # hubHtmlFile.write(htmlPystached)\n- hubHtmlFile.write(htmlMakoRendered)\n+ #hubHtmlFile.write(htmlMakoRendered)\n \n- def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory):\n+ def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath):\n # TODO: Think about the inputs and outputs\n # TODO: Manage the template of this file\n- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/specieDescription\')],\n+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, \'templates/specieDescription\')],\n output_encoding=\'utf-8\', encoding_errors=\'replace\')\n mytemplate = mylookup.get_template("layout.txt")\n with open(descriptionHtmlFilePath, \'w\') as descriptionHtmlFile:\n@@ -199,11 +241,11 @@\n htmlMakoRendered = mytemplate.render(\n specieDescription=\'This is the description of the dbia\',\n )\n- descriptionHtmlFile.write(htmlMakoRendered)\n+ #descriptionHtmlFile.write(htmlMakoRendered)\n \n- def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory):\n+ def __fillGroupsTxtFile__(self, groupsTxtFilePath):\n # TODO: Reenable this function at some point\n- mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/groupsTxt\')],\n+ mylookup = TemplateLookup(directories=[os.path.join(self.tool_directory, \'templates/groupsTxt\')],\n output_encoding=\'utf-8\', encoding_errors=\'replace\')\n mytemplate = mylookup.get_template("layout.txt")\n with open(groupsTxtFilePath, \'w\') as groupsTxtFile:\n' |
b |
diff -r 4f9847539a28 -r acc233161f50 TrackHub.pyc |
b |
Binary file TrackHub.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 hubArchiveCreator.py --- a/hubArchiveCreator.py Wed Jul 20 12:29:08 2016 -0400 +++ b/hubArchiveCreator.py Thu Jul 21 05:58:51 2016 -0400 |
[ |
@@ -14,13 +14,14 @@ import sys # Internal dependencies -from TrackHub import TrackHub -from Gff3 import Gff3 from Bam import Bam from BedSimpleRepeats import BedSimpleRepeats from Bed import Bed from BigWig import BigWig +from util.Fasta import Fasta +from Gff3 import Gff3 from Gtf import Gtf +from TrackHub import TrackHub # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -62,6 +63,10 @@ parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') + parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation') + + parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID') + ucsc_tools_path = '' toolDirectory = '.' @@ -70,11 +75,20 @@ # Get the args passed in parameter args = parser.parse_args() - input_fasta_file = args.fasta + array_inputs_reference_genome = json.loads(args.fasta) + + # TODO: Replace these with the object Fasta + input_fasta_file = array_inputs_reference_genome["false_path"] + input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) + genome_name = sanitize_name_input(args.genome_name) + + reference_genome = Fasta(array_inputs_reference_genome["false_path"], + input_fasta_file_name, genome_name) + + user_email = args.user_email # TODO: Add array for each input because we can add multiple -b for example + filter the data associated - array_inputs_gff3 = args.gff3 array_inputs_bed_simple_repeats = args.bedSimpleRepeats array_inputs_bed_generic = args.bed @@ -96,42 +110,38 @@ if args.extra_files_path: extra_files_path = args.extra_files_path - # TODO: Check here all the binaries / tools we need. Exception is missing + # TODO: Check here all the binaries / tools we need. Exception if missing # Create the Track Hub folder - trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) + trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) all_datatype_dictionary = {} + datatype_parameters = (inputs_data, all_datatype_dictionary) + # Process Augustus if array_inputs_gff3: - create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Gff3, array_inputs_gff3, *datatype_parameters) - # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig + # Process Bed simple repeats if array_inputs_bed_simple_repeats: - create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, *datatype_parameters) - # Process a Bed => tBlastN or TopHat + # Process Bed if array_inputs_bed_generic: - create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Bed, array_inputs_bed_generic, *datatype_parameters) - # Process a GTF => Tophat + # Process GTF if array_inputs_gtf: - create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Gtf, array_inputs_gtf, *datatype_parameters) - # Process a Bam => Tophat + # Process Bam if array_inputs_bam: - create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(Bam, array_inputs_bam, *datatype_parameters) - # Process a BigWig => From Bam + # Process BigWig if array_inputs_bigwig: - create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, toolDirectory) + create_ordered_datatype_objects(BigWig, array_inputs_bigwig, *datatype_parameters) # Create Ordered Dictionary to add the tracks in the tool form order all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) @@ -147,6 +157,10 @@ sys.exit(0) +def sanitize_name_input(string_to_sanitize): + return string_to_sanitize \ + .replace("/", "_") \ + .replace(" ", "_") def sanitize_name_inputs(inputs_data): """ @@ -156,22 +170,16 @@ :return: """ for key in inputs_data: - inputs_data[key]["name"] = inputs_data[key]["name"]\ - .replace("/", "_")\ - .replace(" ", "_") + inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) -def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, - extra_files_path, all_datatype_dictionary, tool_directory): +def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, all_datatype_dictionary): """ Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub and update the dictionary of datatype :param ExtensionClass: T <= Datatype :param array_inputs: list[string] :param inputs_data: - :param input_fasta_file: string - :param extra_files_path: string - :param tool_directory; string """ datatype_dictionary = {} @@ -180,8 +188,8 @@ for input_false_path in array_inputs: for key, data_value in inputs_data.items(): if key == input_false_path: - extensionObject = ExtensionClass(input_false_path, data_value, - input_fasta_file, extra_files_path, tool_directory) + extensionObject = ExtensionClass(input_false_path, data_value) + datatype_dictionary.update({data_value["order_index"]: extensionObject}) all_datatype_dictionary.update(datatype_dictionary) |
b |
diff -r 4f9847539a28 -r acc233161f50 hubArchiveCreator.xml --- a/hubArchiveCreator.xml Wed Jul 20 12:29:08 2016 -0400 +++ b/hubArchiveCreator.xml Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -31,6 +31,9 @@ mkdir -p $output.extra_files_path; python $__tool_directory__/hubArchiveCreator.py + ## Ask the user to enter the genome name + --genome_name '$genome_name' + #import json #set global data_parameter_dict = {} @@ -81,19 +84,32 @@ #end if #end for + ## We combine the fasta file dataset name with his false path in a JSON object + #set fasta_json = json.dumps({"false_path": str($fasta_file), "name": $fasta_file.name}) + -f '$fasta_json' + ## Dump the final json #set all_data_json = json.dumps($data_parameter_dict) - -f $Fasta_File --data_json '$all_data_json' + ## Retrieve the user email + --user_email $__user_email__ + -d $__tool_directory__ -e $output.files_path -o $output; ]]></command> <inputs> <param + name="genome_name" + type="text" + size="30" + value="unknown" + label="UCSC Genome Browser assembly ID" + /> + <param format="fasta" - name="Fasta_File" + name="fasta_file" type="data" label="Reference genome" /> @@ -175,7 +191,7 @@ <!-- Can also use assert_command to test command --> <!-- Testing GFF3 input --> <test> - <param name="Fasta_File" value="dbia3.fa"/> + <param name="fasta_file" value="dbia3.fa"/> <repeat name="format"> <conditional name="formatChoice"> <param name="format_select" value="gff3"/> @@ -209,7 +225,7 @@ </output> </test> <test> - <param name="Fasta_File" value="dbia3.fa"/> + <param name="fasta_file" value="dbia3.fa"/> <param name="GFF3" value="augustusDbia3.gff3"/> <output name="output" file="augustusOutput.html" lines_diff="2"> <extra_files type="directory" value="myHub"/> |
b |
diff -r 4f9847539a28 -r acc233161f50 templates/display.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/display.txt Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -0,0 +1,15 @@ +<%namespace name="os" module="os"/> +<html> + <body> + <p> + The following has been generated by Hub Archive Creator: + </p> + <ul> + % for relative_file_path in list_relative_file_path: + <li> + <a href="${relative_file_path}">${relative_file_path}</a> + </li> + % endfor + </ul> + </body> +</html> \ No newline at end of file |
b |
diff -r 4f9847539a28 -r acc233161f50 util/Fasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/Fasta.py Thu Jul 21 05:58:51 2016 -0400 |
b |
@@ -0,0 +1,16 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +Class describing the Fasta format +(As of the 07/20/2016, only used with the reference genome) +""" + +class Fasta(object): + def __init__(self, false_path, name, assembly_id): + self.false_path = false_path + self.name = name + + if not assembly_id: + assembly_id = "unknown" + self.assembly_id = assembly_id \ No newline at end of file |
b |
diff -r 4f9847539a28 -r acc233161f50 util/Fasta.pyc |
b |
Binary file util/Fasta.pyc has changed |
b |
diff -r 4f9847539a28 -r acc233161f50 util/subtools.py --- a/util/subtools.py Wed Jul 20 12:29:08 2016 -0400 +++ b/util/subtools.py Thu Jul 21 05:58:51 2016 -0400 |
[ |
@@ -39,20 +39,16 @@ return p -def faToTwoBit(fasta_file_name, mySpecieFolder): +def faToTwoBit(fasta_file_name, twoBitFile): """ This function call faToTwoBit UCSC tool, and return the twoBitFile :param fasta_file_name: :param mySpecieFolder: :return: """ - baseNameFasta = os.path.basename(fasta_file_name) - suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) - nameTwoBit = suffixTwoBit + '.2bit' - with open(os.path.join(mySpecieFolder, nameTwoBit), 'w') as twoBitFile: - array_call = ['faToTwoBit', fasta_file_name, twoBitFile.name] - _handleExceptionAndCheckCall(array_call) + array_call = ['faToTwoBit', fasta_file_name, twoBitFile] + _handleExceptionAndCheckCall(array_call) return twoBitFile |
b |
diff -r 4f9847539a28 -r acc233161f50 util/subtools.pyc |
b |
Binary file util/subtools.pyc has changed |