Previous changeset 0:0f3bc17e5ede (2016-07-13) Next changeset 2:df9e12da0d13 (2016-07-14) |
Commit message:
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty |
added:
Bam.py Bam.pyc Bed.py Bed.pyc BedSimpleRepeats.py BedSimpleRepeats.pyc BigWig.py BigWig.pyc Datatype.py Datatype.pyc Gff3.py Gff3.pyc Gtf.py Gtf.pyc Track.py Track.pyc TrackDb.py TrackDb.pyc TrackHub.py TrackHub.pyc __init__.py hubArchiveCreator.py templates/__init__.py templates/genomesAssembly/__init__.py templates/genomesAssembly/layout.txt templates/groupsTxt/__init__.py templates/groupsTxt/layout.txt templates/hubDescription/__init__.py templates/hubDescription/layout.txt templates/hubTxt/__init__.py templates/hubTxt/layout.txt templates/specieDescription/__init__.py templates/specieDescription/layout.txt templates/trackDb/__init__.py templates/trackDb/layout.txt test-data/augustus/hubaInputs/GTF/augustusDbia3.gtf test-data/augustus/hubaInputs/GTF/dbia3.fa test-data/augustus/output/augustusDbia3.bb test-data/augustus/workflowInputs/dbia3.fa test-data/augustusDbia3.gff3 test-data/augustusOutput.html test-data/dbia3.fa test-data/glimmerHMM_output.gff3 test-data/tblastN/dbia3.xml.bb test-data/tblastN/dbia3.xml.sorted.bed test-data/tblastN/dbia3.xml.unbb.bed test-data/tblastN/dbia3.xml.unsorted.bed test-data/tblastN/hubaInputs/tblastn_Dbia3_ci.xml.sorted.bed test-data/tblastN/readme/README.html test-data/tblastN/readme/README.md test-data/tblastN/workflowInputs/ci.pep test-data/tblastN/workflowInputs/dbia3.fa test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.bed test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.psl test-data/trfBig/hubaInputs/dbia3.fa test-data/trfBig/hubaInputs/dbia3_trfBig_sorted.bed test-data/trfBig/output/dbia3_trfBig.bb test-data/trfBig/workflowInputs/dbia3.fa test-data/trfBig/workflowInputs/dbia3.fa.txt test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted.bed test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted_withMaskedGenome.fa todo.md tool_dependencies.xml trackHub/README.md trackHub/datatypes_conf.xml trackHub/trackhub.xml trackHub/tracks_partial.py trf_simpleRepeat.as util/__init__.py util/__init__.pyc util/add_datatype.py util/cleanDirectory.py util/install_linux_binaries util/subtools.py util/subtools.pyc |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Bam.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +Class to handle Bam files to UCSC TrackHub +""" + +import os +import shutil + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class Bam( Datatype ): + def __init__( self, input_bam_false_path, data_bam , + inputFastaFile, extra_files_path, tool_directory ): + super(Bam, self).__init__( input_fasta_file=inputFastaFile, + extra_files_path=extra_files_path, + tool_directory=tool_directory, + ) + + self.track = None + + self.input_bam_false_path = input_bam_false_path + + self.data_bam = data_bam + # TODO: Check if it already contains the .bam extension / Do a function in Datatype which check the extension + self.name_bam = self.data_bam["name"] + ".bam" + self.priority = self.data_bam["order_index"] + self.index_bam = self.data_bam["index"] + + print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam) + + # First: Add the bam file + # Second: Add the bam index file, in the same folder (https://genome.ucsc.edu/goldenpath/help/bam.html) + + bam_file_path = os.path.join(self.myTrackFolderPath, self.name_bam) + shutil.copyfile(self.input_bam_false_path, bam_file_path) + + # Create and add the bam index file to the same folder + name_index_bam = self.name_bam + ".bai" + bam_index_file_path = os.path.join(self.myTrackFolderPath, name_index_bam) + shutil.copyfile(self.index_bam, bam_index_file_path) + + # Create the Track Object + dataURL = "tracks/%s" % self.name_bam + + trackDb = TrackDb( + trackName=self.name_bam, + longLabel=self.name_bam, + shortLabel=self.getShortName( self.name_bam ), + trackDataURL=dataURL, + trackType='bam', + visibility='pack', + priority=self.priority, + ) + + # Return the Bam Track Object + self.track = Track( + trackFile=bam_index_file_path, + trackDb=trackDb, + ) + + print("- %s created in %s" % (self.name_bam, bam_file_path)) + print("- %s created in %s" % (self.index_bam, bam_index_file_path)) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bam.pyc |
b |
Binary file Bam.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Bed.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,72 @@ +#!/usr/bin/python + +import os +import tempfile + +# Internal dependencies +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class Bed( Datatype ): + def __init__( self, inputBedGeneric, data_bed_generic, + inputFastaFile, extra_files_path, tool_directory ): + super(Bed, self).__init__( + inputFastaFile, extra_files_path, tool_directory + ) + + self.track = None + + self.inputBedGeneric = inputBedGeneric + + self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + + self.data_bed_generic = data_bed_generic + self.name_bed_generic = self.data_bed_generic["name"] + self.priority = self.data_bed_generic["order_index"] + + # Sort processing + subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) + + # Generate the chrom.sizes + # TODO: Isolate in a function + # We first get the twoBit Infos + subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name) + + # Then we get the output to inject into the sort + # TODO: Check if no errors + subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb + trackName = "".join( ( self.name_bed_generic, ".bb") ) + + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + with open(myBigBedFilePath, 'w') as self.bigBedFile: + subtools.bedToBigBed(self.sortedBedFile.name, self.chromSizesFile.name, self.bigBedFile.name) + + # Create the Track Object + dataURL = "tracks/%s" % trackName + + trackDb = TrackDb( + trackName=trackName, + longLabel=self.name_bed_generic, + shortLabel=self.getShortName(self.name_bed_generic), + trackDataURL=dataURL, + trackType='bigBed', + visibility='dense', + thickDrawItem='on', + priority=self.priority, + ) + + # Return the BigBed track + self.track = Track( + trackFile=myBigBedFilePath, + trackDb=trackDb, + ) + + print("- %s created in %s" % (trackName, myBigBedFilePath)) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bed.pyc |
b |
Binary file Bed.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a BedSimpleRepeats.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BedSimpleRepeats.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,68 @@ +#!/usr/bin/python + +import os +import tempfile + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class BedSimpleRepeats( Datatype ): + def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats, + input_fasta_file, extra_files_path, tool_directory ): + + super(BedSimpleRepeats, self).__init__( + input_fasta_file, extra_files_path, tool_directory + ) + + self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path + self.name_bed_simple_repeats = data_bed_simple_repeats["name"] + self.priority = data_bed_simple_repeats["order_index"] + + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + + # Sort processing + subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name) + + # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf + # Generate the chrom.sizes + + subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) + + # Then we get the output to inject into the sort + # TODO: Check if no errors + subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + .bb + trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as')) + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name, + typeOption='-type=bed4+12', + autoSql=auto_sql_option) + + # Create the Track Object + dataURL = "tracks/%s" % trackName + + trackDb = TrackDb( + trackName=trackName, + longLabel=self.name_bed_simple_repeats, + shortLabel=self.getShortName( self.name_bed_simple_repeats ), + trackDataURL=dataURL, + trackType='bigBed 4 +', + visibility='dense', + priority=self.priority, + ) + + self.track = Track( + trackFile=myBigBedFilePath, + trackDb=trackDb, + ) + + print("- %s created in %s" % (trackName, myBigBedFilePath)) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a BedSimpleRepeats.pyc |
b |
Binary file BedSimpleRepeats.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a BigWig.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BigWig.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,51 @@ +#!/usr/bin/python + +import os +import shutil + +# Internal dependencies +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb + + +class BigWig( Datatype ): + def __init__(self, input_bigwig_path, data_bigwig, + input_fasta_path, extra_files_path, tool_directory): + super(BigWig, self).__init__( + input_fasta_path, extra_files_path, tool_directory + ) + + self.track = None + + self.input_bigwig_path = input_bigwig_path + self.name_bigwig = data_bigwig["name"] + self.priority = data_bigwig["order_index"] + + print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig ) + + trackName = "".join( ( self.name_bigwig, ".bigwig" ) ) + + myBigWigFilePath = os.path.join(self.myTrackFolderPath, trackName) + shutil.copy(self.input_bigwig_path, myBigWigFilePath) + + # Create the Track Object + dataURL = "tracks/%s" % trackName + + # Return the BigBed track + trackDb = TrackDb( + trackName=trackName, + longLabel=self.name_bigwig, + shortLabel=self.getShortName( self.name_bigwig ), + trackDataURL=dataURL, + trackType='bigWig', + visibility='full', + priority=self.priority, + ) + + self.track = Track( + trackFile=myBigWigFilePath, + trackDb=trackDb, + ) + + print("- %s created in %s" % (trackName, myBigWigFilePath)) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a BigWig.pyc |
b |
Binary file BigWig.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Datatype.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Datatype.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,36 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +Super Class of the managed datatype +""" + +import os + +from util import subtools + + +class Datatype(object): + def __init__( self, input_fasta_file, extra_files_path, tool_directory ): + + self.input_fasta_file = input_fasta_file + self.extra_files_path = extra_files_path + self.tool_directory = tool_directory + + + # Construction of the arborescence + # TODO: Change the hard-coded path with a input based one + self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3") + + # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object + self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks") + + # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator? + # 2bit file creation from input fasta + self.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath) + + def getShortName( self, name_to_shortify ): + # Slice to get from Long label the short label + short_label_slice = slice(0, 15) + + return name_to_shortify[short_label_slice] \ No newline at end of file |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Datatype.pyc |
b |
Binary file Datatype.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Gff3.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,77 @@ +#!/usr/bin/python + +import os +import tempfile + +# Internal dependencies +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class Gff3( Datatype ): + def __init__( self, input_Gff3_false_path, data_gff3, + input_fasta_false_path, extra_files_path, tool_directory ): + super( Gff3, self ).__init__( + input_fasta_false_path, extra_files_path, tool_directory + ) + + self.track = None + + self.input_Gff3_false_path = input_Gff3_false_path + self.name_gff3 = data_gff3["name"] + self.priority = data_gff3["order_index"] + + # TODO: See if we need these temporary files as part of the generated files + genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") + unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py) + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + + # gff3ToGenePred processing + subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name) + + # TODO: From there, refactor because common use with Gtf.py + # genePredToBed processing + subtools.genePredToBed(genePredFile.name, unsortedBedFile.name) + + # Sort processing + subtools.sort(unsortedBedFile.name, sortedBedFile.name) + + # Generate the twoBitInfo + subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) + + # Then we get the output to generate the chromSizes + # TODO: Check if no errors + subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb + trackName = "".join( (self.name_gff3, ".bb" ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + + # Create the Track Object + dataURL = "tracks/%s" % trackName + + trackDb = TrackDb( + trackName=trackName, + longLabel=self.name_gff3, + shortLabel=self.getShortName( self.name_gff3 ), + trackDataURL=dataURL, + trackType='bigBed 12 +', + visibility='dense', + priority=self.priority, + ) + + self.track = Track( + trackFile=myBigBedFilePath, + trackDb=trackDb, + ) + + print("- %s created in %s" % (trackName, myBigBedFilePath)) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gff3.pyc |
b |
Binary file Gff3.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gtf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Gtf.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,78 @@ +#!/usr/bin/python + +import os +import tempfile + +# Internal dependencies +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class Gtf( Datatype ): + def __init__( self, input_gtf_false_path, data_gtf, + input_fasta_file, extra_files_path, tool_directory ): + super(Gtf, self).__init__( input_fasta_file=input_fasta_file, + extra_files_path=extra_files_path, + tool_directory=tool_directory ) + + self.track = None + + self.input_gtf_false_path = input_gtf_false_path + self.name_gtf = data_gtf["name"] + self.priority = data_gtf["order_index"] + + print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) + + # TODO: See if we need these temporary files as part of the generated files + genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") + unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) + chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") + + # GtfToGenePred + subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) + + # TODO: From there, refactor because common use with Gff3.py + # genePredToBed processing + subtools.genePredToBed(genePredFile.name, unsortedBedFile.name) + + # Sort processing + subtools.sort(unsortedBedFile.name, sortedBedFile.name) + + # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class + # Generate the twoBitInfo + subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) + + # Then we get the output to generate the chromSizes + # TODO: Check if no errors + subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb + trackName = "".join( ( self.name_gtf, ".bb") ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) + + # Create the Track Object + dataURL = "tracks/%s" % trackName + + trackDb = TrackDb( + trackName=trackName, + longLabel=self.name_gtf, + shortLabel=self.getShortName( self.name_gtf ), + trackDataURL=dataURL, + trackType='bigBed 12 +', + visibility='dense', + priority=self.priority, + ) + self.track = Track( + trackFile=myBigBedFilePath, + trackDb=trackDb, + ) + + print("- %s created in %s" % (trackName, myBigBedFilePath)) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gtf.pyc |
b |
Binary file Gtf.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Track.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Track.py Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,10 @@ +#!/usr/bin/python + + +class Track(object): + """Class to manage the track informations needed for Track Hub, in the TrackDb text file""" + + def __init__(self, trackFile=None, trackDb=None): + self.trackFile = trackFile + + self.trackDb = trackDb |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a Track.pyc |
b |
Binary file Track.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackDb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TrackDb.py Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,17 @@ +#!/usr/bin/python + +class TrackDb(object): + """docstring for TrackDb""" + + def __init__(self, trackName="", longLabel="", shortLabel="", trackDataURL="", trackType="", visibility="", + thickDrawItem='off', priority="0"): + super(TrackDb, self).__init__() + + self.trackName = trackName + self.longLabel = longLabel + self.shortLabel = shortLabel + self.trackDataURL = trackDataURL + self.trackType = trackType + self.visibility = visibility + self.thickDrawItem = thickDrawItem + self.priority = priority |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackDb.pyc |
b |
Binary file TrackDb.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackHub.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TrackHub.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
b'@@ -0,0 +1,218 @@\n+#!/usr/bin/python\n+# -*- coding: utf8 -*-\n+\n+import os\n+import zipfile\n+\n+from mako.lookup import TemplateLookup\n+\n+\n+class TrackHub(object):\n+ """docstring for TrackHub"""\n+\n+ def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory):\n+ super(TrackHub, self).__init__()\n+\n+ self.rootAssemblyHub = None\n+ self.mySpecieFolderPath = None\n+ self.tool_directory = tool_directory\n+\n+ # TODO: Modify according to the files passed in parameter\n+ mylookup = TemplateLookup(directories=[os.path.join(tool_directory, \'templates/trackDb\')],\n+ output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+ self.trackDbTemplate = mylookup.get_template("layout.txt")\n+\n+ self.extra_files_path = extra_files_path\n+ self.outputFile = outputFile\n+\n+ inputFastaFile = open(inputFastaFile, \'r\')\n+ self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, \'myHub.zip\'), \'w\')\n+\n+ # Create the structure of the Assembly Hub\n+ # TODO: Merge the following processing into a function as it is also used in twoBitCreator\n+ baseNameFasta = os.path.basename(inputFastaFile.name)\n+ suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)\n+ self.twoBitName = suffixTwoBit + \'.2bit\'\n+\n+ self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory,\n+ extra_files_path=extra_files_path)\n+\n+ def createZip(self):\n+ for root, dirs, files in os.walk(self.rootAssemblyHub):\n+ # Get all files and construct the dir at the same time\n+ for file in files:\n+ self.outputZip.write(os.path.join(root, file))\n+\n+ self.outputZip.close()\n+\n+ def addTrack(self, trackDbObject=None):\n+ # Create the trackDb.txt file in the specie folder, if not exists\n+ # Else append the new track\n+ trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, \'trackDb.txt\')\n+\n+ # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object\n+ with open(trackDbTxtFilePath, \'a+\') as trackDbFile:\n+ trackDbs = [trackDbObject]\n+ htmlMakoRendered = self.trackDbTemplate.render(\n+ trackDbs=trackDbs\n+ )\n+ trackDbFile.write(htmlMakoRendered)\n+\n+ def terminate(self):\n+ # Just a test to output a simple HTML\n+ with open(self.outputFile, \'w\') as htmlOutput:\n+ htmlOutput.write(\'<html>\')\n+ htmlOutput.write(\'<body>\')\n+ htmlOutput.write(\'<p>\')\n+ htmlOutput.write(\'The following generated by Hub Archive Creator:\')\n+ htmlOutput.write(\'</p>\')\n+ htmlOutput.write(\'<ul>\')\n+ for root, dirs, files in os.walk(self.extra_files_path):\n+ for file in files:\n+ relDir = os.path.relpath(root, self.extra_files_path)\n+ htmlOutput.write(str.format(\'<li><a href="{0}">{1}</a></li>\', os.path.join(relDir, file),\n+ os.path.join(relDir, file)))\n+ htmlOutput.write(\'<ul>\')\n+ htmlOutput.write(\'</body>\')\n+ htmlOutput.write(\'</html>\')\n+\n+ def __createAssemblyHub__(self, toolDirectory, extra_files_path):\n+ # TODO: Manage to put every fill Function in a file dedicated for reading reasons\n+ # Create the root directory\n+ myHubPath = os.path.join(extra_files_path, "myHub")\n+ if not os.path.exists(myHubPath):\n+ os.makedirs(myHubPath)\n+\n+ # Add the genomes.txt file\n+ genomesTxtFilePath = os.path.join(myHubPath, \'genomes.txt\')\n+ self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory)\n+\n+ # Add the hub.txt file\n+ hubTxtFilePath = os.path.join(myHubPath, \'hub.txt\')\n+ self.__fillHubTxt__(hubTxtFilePath, toolDirectory)\n+\n+ '..b'ription="dbia3/description.html"\n+ )\n+ genomesTxtFile.write(htmlMakoRendered)\n+\n+ def __fillHubTxt__(self, hubTxtFilePath, toolDirectory):\n+ # TODO: Think about the inputs and outputs\n+ # TODO: Manage the template of this file\n+ mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/hubTxt\')],\n+ output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+ mytemplate = mylookup.get_template(\'layout.txt\')\n+ with open(hubTxtFilePath, \'w\') as genomesTxtFile:\n+ # Write the content of the file genomes.txt\n+ htmlMakoRendered = mytemplate.render(\n+ hubName=\'dbiaOnly\',\n+ shortLabel=\'dbia\',\n+ longLabel=\'This hub only contains dbia with the gene predictions\',\n+ genomesFile=\'genomes.txt\',\n+ email=\'rmarenco@gwu.edu\',\n+ descriptionUrl=\'dbia.html\'\n+ )\n+ genomesTxtFile.write(htmlMakoRendered)\n+\n+ def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory):\n+ # TODO: Think about the inputs and outputs\n+ # TODO: Manage the template of this file\n+ # renderer = pystache.Renderer(search_dirs="templates/hubDescription")\n+ # t = Template(templates.hubDescription.layout.html)\n+ mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/hubDescription\')],\n+ output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+ mytemplate = mylookup.get_template("layout.txt")\n+ with open(hubHtmlFilePath, \'w\') as hubHtmlFile:\n+ # Write the content of the file genomes.txt\n+ # htmlPystached = renderer.render_name(\n+ # "layout",\n+ # {\'specie\': \'Dbia\',\n+ # \'toolUsed\': \'Augustus\',\n+ # \'ncbiSpecieUrl\': \'http://www.ncbi.nlm.nih.gov/genome/3499\',\n+ # \'genomeID\': \'3499\',\n+ # \'SpecieFullName\': \'Drosophila biarmipes\'})\n+ htmlMakoRendered = mytemplate.render(\n+ specie=\'Dbia\',\n+ toolUsed=\'Augustus\',\n+ ncbiSpecieUrl=\'http://www.ncbi.nlm.nih.gov/genome/3499\',\n+ genomeID=\'3499\',\n+ specieFullName=\'Drosophila biarmipes\'\n+ )\n+ # hubHtmlFile.write(htmlPystached)\n+ hubHtmlFile.write(htmlMakoRendered)\n+\n+ def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory):\n+ # TODO: Think about the inputs and outputs\n+ # TODO: Manage the template of this file\n+ mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/specieDescription\')],\n+ output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+ mytemplate = mylookup.get_template("layout.txt")\n+ with open(descriptionHtmlFilePath, \'w\') as descriptionHtmlFile:\n+ # Write the content of the file genomes.txt\n+ htmlMakoRendered = mytemplate.render(\n+ specieDescription=\'This is the description of the dbia\',\n+ )\n+ descriptionHtmlFile.write(htmlMakoRendered)\n+\n+ def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory):\n+ # TODO: Reenable this function at some point\n+ mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/groupsTxt\')],\n+ output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+ mytemplate = mylookup.get_template("layout.txt")\n+ with open(groupsTxtFilePath, \'w\') as groupsTxtFile:\n+ # Write the content of groups.txt\n+ # groupsTxtFile.write(\'name map\')\n+ htmlMakoRendered = mytemplate.render(\n+ mapName=\'map\',\n+ labelMapping=\'Mapping\',\n+ prioriy=\'2\',\n+ isClosed=\'0\'\n+ )\n+ # groupsTxtFile.write(htmlMakoRendered)\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackHub.pyc |
b |
Binary file TrackHub.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a hubArchiveCreator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hubArchiveCreator.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,193 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +This Galaxy tool permits to prepare your files to be ready for +Assembly Hub visualization. +Program test arguments: +hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html +""" + +import argparse +import collections +import json +import sys + +# Internal dependencies +from TrackHub import TrackHub +from Gff3 import Gff3 +from Bam import Bam +from BedSimpleRepeats import BedSimpleRepeats +from Bed import Bed +from BigWig import BigWig +from Gtf import Gtf + + +# TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort + + +def main(argv): + # Command Line parsing init + parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') + + # Reference genome mandatory + parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome') + + # GFF3 Management + parser.add_argument('--gff3', action='append', help='GFF3 format') + + # GTF Management + parser.add_argument('--gtf', action='append', help='GTF format') + + # Bed4+12 (TrfBig) + parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') + + # Generic Bed (Blastx transformed to bed) + parser.add_argument('--bed', action='append', help='Bed generic format') + + # BigWig Management + parser.add_argument('--bigwig', action='append', help='BigWig format') + + # Bam Management + parser.add_argument('--bam', action='append', help='Bam format') + + # TODO: Check if the running directory can have issues if we run the tool outside + parser.add_argument('-d', '--directory', + help='Running tool directory, where to find the templates. Default is running directory') + parser.add_argument('-u', '--ucsc_tools_path', + help='Directory where to find the executables needed to run this tool') + parser.add_argument('-e', '--extra_files_path', + help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') + parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') + + parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') + + ucsc_tools_path = '' + + toolDirectory = '.' + extra_files_path = '.' + + # Get the args passed in parameter + args = parser.parse_args() + + input_fasta_file = args.fasta + + # TODO: Add array for each input because we can add multiple -b for example + filter the data associated + + + array_inputs_gff3 = args.gff3 + array_inputs_bed_simple_repeats = args.bedSimpleRepeats + array_inputs_bed_generic = args.bed + array_inputs_gtf = args.gtf + array_inputs_bam = args.bam + array_inputs_bigwig = args.bigwig + + outputFile = args.output + json_inputs_data = args.data_json + + inputs_data = json.loads(json_inputs_data) + + # We remove the spaces in ["name"] of inputs_data + sanitize_name_inputs(inputs_data) + + json_inputs_data = args.data_json + + inputs_data = json.loads(json_inputs_data) + # We remove the spaces in ["name"] of inputs_data + sanitize_name_inputs(inputs_data) + + if args.directory: + toolDirectory = args.directory + if args.extra_files_path: + extra_files_path = args.extra_files_path + if args.ucsc_tools_path: + ucsc_tools_path = args.ucsc_tools_path + + # TODO: Check here all the binaries / tools we need. Exception is missing + + # Create the Track Hub folder + trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) + + all_datatype_dictionary = {} + + # Process Augustus + if array_inputs_gff3: + create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, toolDirectory) + + # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig + if array_inputs_bed_simple_repeats: + create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, toolDirectory) + + # Process a Bed => tBlastN or TopHat + if array_inputs_bed_generic: + create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, toolDirectory) + + # Process a GTF => Tophat + if array_inputs_gtf: + create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, toolDirectory) + + # Process a Bam => Tophat + if array_inputs_bam: + create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, toolDirectory) + + # Process a BigWig => From Bam + if array_inputs_bigwig: + create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, toolDirectory) + + # Create Ordered Dictionary to add the tracks in the tool form order + all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) + + for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): + trackHub.addTrack(datatypeObject.track.trackDb) + + # We process all the modifications to create the zip file + trackHub.createZip() + + # We terminate le process and so create a HTML file summarizing all the files + trackHub.terminate() + + sys.exit(0) + + +def sanitize_name_inputs(inputs_data): + """ + Sometimes output from Galaxy, or even just file name from user have spaces + :param inputs_data: dict[string, dict[string, string]] + :return: + """ + for key in inputs_data: + inputs_data[key]["name"] = inputs_data[key]["name"].replace(" ", "_") + + +def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, + extra_files_path, all_datatype_dictionary, tool_directory): + """ + Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub + and update the dictionary of datatype + :param ExtensionClass: T <= Datatype + :param array_inputs: list[string] + :param inputs_data: + :param input_fasta_file: string + :param extra_files_path: string + :param tool_directory; string + """ + + datatype_dictionary = {} + + # TODO: Optimize this double loop + for input_false_path in array_inputs: + for key, data_value in inputs_data.items(): + if key == input_false_path: + extensionObject = ExtensionClass(input_false_path, data_value, + input_fasta_file, extra_files_path, tool_directory) + datatype_dictionary.update({data_value["order_index"]: extensionObject}) + all_datatype_dictionary.update(datatype_dictionary) + +if __name__ == "__main__": + main(sys.argv) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/genomesAssembly/layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/genomesAssembly/layout.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,11 @@ +## TODO: Add a loop to be able to put this template for each genome +genome ${genomeName} +trackDb ${trackDbPath} +groups ${groupsPath} +description ${genomeDescription} +twoBitPath ${twoBitPath} +organism ${organismName} +defaultPos ${defaultPosition} +orderKey ${orderKey} +scientificName ${scientificName} +htmlPath ${pathAssemblyHtmlDescription} |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/groupsTxt/layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/groupsTxt/layout.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,4 @@ +name ${mapName} +label ${labelMapping} +priority ${prioriy} +defaultIsClosed ${isClosed} |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/hubDescription/layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/hubDescription/layout.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,6 @@ +<html> +<body> + ${specie} genome with ${toolUsed} + <a href="${ncbiSpecieUrl}">NCBI genome/${genomeID} (${specieFullName})</a> +</body> +</html> |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/hubTxt/layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/hubTxt/layout.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,6 @@ +hub ${hubName} +shortLabel ${shortLabel} +longLabel ${longLabel} +genomesFile ${genomesFile} +email ${email} +descriptionUrl ${descriptionUrl} |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/specieDescription/layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/specieDescription/layout.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,5 @@ +<html> +<body> + ${specieDescription} +</body> +</html> |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/trackDb/layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/templates/trackDb/layout.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,12 @@ +% for trackDb in trackDbs: + ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html + track ${trackDb.trackName} + longLabel ${trackDb.longLabel} + shortLabel ${trackDb.shortLabel} + bigDataUrl ${trackDb.trackDataURL} + type ${trackDb.trackType} + visibility ${trackDb.visibility} + thickDrawItem ${trackDb.thickDrawItem} + priority ${trackDb.priority} + +% endfor |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/hubaInputs/GTF/augustusDbia3.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/augustus/hubaInputs/GTF/augustusDbia3.gtf Wed Jul 13 13:36:37 2016 -0400 |
[ |
b'@@ -0,0 +1,7076 @@\n+# This output was generated with AUGUSTUS (version 3.1.0).\n+# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de),\n+# Oliver Keller, Stefanie K\xc3\xb6nig and Lizzy Gerischer.\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# No extrinsic information on sequences given.\n+# Initialising the parameters using config directory /home/galaxy/galaxy/dependency_dir/augustus/3.1/iuc/package_augustus_3_1/24009970003a/config/ ...\n+# fly version. Using default transition matrix.\n+# Looks like /home/galaxy/galaxy/database/files/000/dataset_2.dat is in fasta format.\n+# We have hints for 0 sequences and for 0 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 45179, name = contig1) -----\n+#\n+# Constraints/Hints:\n+# (none)\n+# Predicted genes for sequence number 1 on both strands\n+# start gene contig1.g1\n+contig1\tAUGUSTUS\tgene\t553\t34688\t0.03\t-\t.\tcontig1.g1\n+contig1\tAUGUSTUS\ttranscript\t553\t34688\t0.03\t-\t.\tcontig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t553\t578\t0.41\t-\t2\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t652\t1047\t0.25\t-\t2\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t12541\t12968\t0.87\t-\t1\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t32202\t33826\t0.89\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t33880\t34044\t0.98\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t34107\t34619\t0.99\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t34674\t34688\t0.45\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tstart_codon\t34686\t34688\t.\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+# coding sequence = [atggcagcactctcgcaaaaggactttaatgacggaaaccagagaaatcgtcagaggaaaagtactgtcactgagcagc\n+# cctcatcaacatcaggaagcgtggcccaagtagaagcggacagcgcatcgtcacatttgtctgatcgctgttataacaacatagcaagtactaccaaa\n+# agtattgttggtgatgtgaaaacaagcagacgctgtgaagactttataagtagtggatcagcttcgactccattaaatgaatatgattgtgccaacgc\n+# tgatacgacggatattaaagatgaacctggagattttatagaaacaaattgtcattggcgaagttgttgtattgaatttaatacgcaagatgagcttg\n+# ttaaacatatcaataatgaccatatccaaaccaataagaaggcctttgtctgtcgatgggaaaattgtacccgtggtgaaaaacctttcaaagcgcag\n+# tatatgctagttgtgcatatgcgtcgtcataccggcgaaaagccacataaatgcacatttgaaggctgttttaaggcatattcacgcttggaaaacct\n+# aaaaacacatttacggtcacacacaggtgaaaaaccctatatgtgcgagtatccgggatgcagcaaggcgtttagtaatgcaagcgatcgtgcaaagc\n+# atcaaaatcgtactcacagtaatgagaaaccgtacatttgtaaagctcctggttgcacgaaacgttacaccgacccaagttctttgcgtaaacatgtt\n+# aaaacagttcatggtgctgagttttatgctaataaaaaacacaagggattgcccctaaatgatgcgaactctcgcctccaccgagacagtggccaagg\n+# tcggcataatcttcaagagcataatattgactctagcccttgcagcgaagaacttcaggtgggaaaacttataggcatgtctagtcccagtattaaat\n+# ctgaatctgatgaaagttcaccacatcatcagttgttaagtggagttcgagcttcagactgttttttaacatattcacaagatggtgccgccgaacat\n+# ataactttagatgacggctgggattgtgacgatgacgttgacgtggccgacttaccaattgtcttgcgtgctatggtaaatgttggcagcggaaattc\n+# gatggccccgaccattggagggggtgttgttgcgaggcagcgatttagaagtcgtttgcaaactaaaggaataaactctagtatggttatgcttggca\n+# acatccccgaaagcaatcgcaccattggaataagcgaacttaaccagcgtataacagaacttaagatggagcctggtaccgcttgtgatattacagtt\n+# ccattgaatacggggctggaaaggatttcagaagacttatcacaaaatcaatcaaatataacattaaataagcaaagcttctttaccgcttccggctc\n+# tctccaaggacattttcgtcgcgatagccagaactccactgcaagtacatattatggtagcatgcaaagtcgccgcagtagtcaatcatctcaggtgt\n+# cttctatatctacaatgcgtccaggcccatcgtacaacacaaccacagcttccctctatgatccaatttccccaggatgctctcgacgctctagccaa\n+# atgtctaatgtagtcaactcctacgcacttacatcaacatcaggattgactgcaattaacaaggacttaaatgcaaacagcagcccaaatgcttctat\n+# taataaaccgggtcttggtggtcagtactttggtttttacaataacagtcttcctccacctccatcgtctcatttaattgccaccaatttgaagcatc\n+# tgcaggacacagactctaggagttgttatcacaacacaactggcggtcgattttccattcccaattgtacgccatctctacatttagactacaat'..b'agt\n+# tttcgtgttggacgaatattgcgctcgctatggtgtgcggggatgctaccgacatttatgctacctttctgatttactggatcgtgcagaaaagcaac\n+# acatgatagatccaacactaattcactattcatttgcgttttgcgcaagccacgttcacggaaatcgacctgatggggtaggaagcattacgcatgag\n+# gaaaaggaaaaattttctgaaatcaaagaacgcctacgtcagttactggagtttcaaataaccaattttagatactgttttcctttcggtcgccccga\n+# aggcgctcttaaagcgacattatctttactagagagagtgctaatgaaagacattgttacccctgttccacctgaagaagttcgtcaaatgattaaaa\n+# aaagtttagagacggcagcgcttgtaaattacacccgtctctccaataaagctaagattgaaggcacttttccgttcaagggacacgtgccagggaaa\n+# tcctatcggaatttcaaaatttttccaagcactttcctgtacacgggaaacgtcccagggaaagcctatcggaaaggtcccagggaaatcccgtcgga\n+# tattcaggatttgcgaggagaggttattgttccgcccccaaaaaaactagaggacctaattcacttagcagaactttgtgttgatctgttgcaacaaa\n+# atgaagagcactatggagaactgcgcaaacatgacaaaatggataaaattaaaatgcgtaaggaagatgatgatgtaccaaaaggccacaatgaaagc\n+# gatattgatttaaccgccaatactggactcagtagcacatcagacctggcttctgcagcatcaactaatggatcgtcatttcgttattataatttgag\n+# gaatgggcgttttcatcagcacctgcgagacacattcgcaccattagtcgtgcggtacgtggatctgatggaatcttcaatagctcagtcgattcata\n+# agggatttgaaaaagaacgctgggaaagtaaagggaacggatgtgccacctctgaagacttattttggaaactagatgctctacagtcgtttataaga\n+# gacctgcactggccagacgcagagtttcgacaacatttagaacagcgtcttaaaatgatggccgtcgatatgatagagcaatgtatacaacgaactga\n+# ttcgtcttttcagtcgtggctaaaaaaaaacattgccttcatatcaactgattatattttaccttcagaaatgtgcgctatggtcaatgtgatattag\n+# atgctaaaaatcaaagctttaaattgactactattgacggcattgatttgtataaatttcatgcaaaaattgacgaccaaatcgacaaagcgaatgta\n+# gctatgacacaaggtctaactggtaaacttatgtcagtgctagagtcgactttgtcaaaattagcacgatacgacgaaggtagcctaatcggctcgat\n+# tcttagttttacaaatgtatcgagctcgggaaaggatctcgggcaaggatatgtaaatttctttagaaataatatggatcaagtacgaggaaaaattg\n+# gcgacgatttatggaccctgaatttctttgagcagtggtactcgcagcagattaacatgctatgtaattggctttcggaacgtttggaccacgctctg\n+# cactacgctcaagtttcatctatttctcacattatcaagaaaatatattcagacttcgaattacaaggtgtattagaagataaattaaactctaaagc\n+# atatcaagcagtcgcacagcgaatggcgacagaggaagcgacatgtgctttgacaatgcctgatgttagcgaagatgaaccctgtgacgacattcgag\n+# aaggggaagaagaagatactggcgacgaatctacctctaacataccaaggggcttaccaaaaccaaaaattgctgccgctcaagctgctgctgttacc\n+# aacgttgttgccggccgtgtgggtaatttactcggcaaaggcattggcggccttagttcaaagttgggaagtggaagttggttttaa]\n+# protein sequence = [MIDPSSSEEEGEDDPIANVSSKGRLTHAPKGTNTVSILGGVSGPGVGSNMAISGSNGDLAGNQRQSNISSISNRNDAG\n+# NVAGVGGSSNKNEQIHGSRVDGGNLEVPNSCIPSGVSQETLNQSIGSSRANSLPRPLSPSPSLTSEKPDTGDPHAFLKGETQIMADEAFQNAVQSYHD\n+# VFLKSERVLKMVQSGASSQHDFREVFRNNIEKRVRSLPEIDGLSKETVLTSWMAKFDIILKGTGEEDSKRPSRMQQSLNSELILSKEQLYDMFQQILL\n+# VKKFEHQILYNALMLDSADEQAAAIRRELDGRMQRVGEMEKNRKLMPKFVLKEMESLYVEELKSSINLLMANLESLPVSKGNMDSKYGLQKLKRYNHR\n+# KLILRSHGSLSKLEGDSEDGSTQLTKLDVVLTFQLEVIVMEVKGLKSLAPNRIVYCTMEVENGEKLQTDQAEASKPMWDTQGDFTTTHPLPVVKVKLY\n+# TENPGMLALEDKELGKVILKPTPLSSKSPEWHRMVIPKNLPDQDIRIKIACRLDKPLNMKHCGYKEKKSEPSEMMQLDGYTVDYIEAASANLMFGIDL\n+# NGGRFFFNAVREGDSISFACDDENECSLWVMAMYRATGQSHKPTPPITQDKNSAMSKIQGARVAPDGSIFLWASFFVAAATRNSSSYHWFRVGGRLNP\n+# MVVSAAARMEWDMCAQSPLASTWRGIGWWSSSERGQPYDLVDVEVPKSGATHYHVSCRGEVNQPETVVRDGLMEAESTDGGVKDAFATELCVEVPEDK\n+# LHVVVWGWFSPGQVFVLDEYCARYGVRGCYRHLCYLSDLLDRAEKQHMIDPTLIHYSFAFCASHVHGNRPDGVGSITHEEKEKFSEIKERLRQLLEFQ\n+# ITNFRYCFPFGRPEGALKATLSLLERVLMKDIVTPVPPEEVRQMIKKSLETAALVNYTRLSNKAKIEGTFPFKGHVPGKSYRNFKIFPSTFLYTGNVP\n+# GKAYRKGPREIPSDIQDLRGEVIVPPPKKLEDLIHLAELCVDLLQQNEEHYGELRKHDKMDKIKMRKEDDDVPKGHNESDIDLTANTGLSSTSDLASA\n+# ASTNGSSFRYYNLRNGRFHQHLRDTFAPLVVRYVDLMESSIAQSIHKGFEKERWESKGNGCATSEDLFWKLDALQSFIRDLHWPDAEFRQHLEQRLKM\n+# MAVDMIEQCIQRTDSSFQSWLKKNIAFISTDYILPSEMCAMVNVILDAKNQSFKLTTIDGIDLYKFHAKIDDQIDKANVAMTQGLTGKLMSVLESTLS\n+# KLARYDEGSLIGSILSFTNVSSSGKDLGQGYVNFFRNNMDQVRGKIGDDLWTLNFFEQWYSQQINMLCNWLSERLDHALHYAQVSSISHIIKKIYSDF\n+# ELQGVLEDKLNSKAYQAVAQRMATEEATCALTMPDVSEDEPCDDIREGEEEDTGDESTSNIPRGLPKPKIAAAQAAAVTNVVAGRVGNLLGKGIGGLS\n+# SKLGSGSWF]\n+# end gene contig70.g117\n+###\n+# command line:\n+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /home/galaxy/galaxy/database/files/000/dataset_2.dat --UTR=off --genemodel=complete --species=fly\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/hubaInputs/GTF/dbia3.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/augustus/hubaInputs/GTF/dbia3.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/output/augustusDbia3.bb |
b |
Binary file test-data/augustus/output/augustusDbia3.bb has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/workflowInputs/dbia3.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/augustus/workflowInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustusDbia3.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/augustusDbia3.gff3 Wed Jul 13 13:36:37 2016 -0400 |
[ |
b'@@ -0,0 +1,9513 @@\n+##gff-version 3\n+# This output was generated with AUGUSTUS (version 3.1.0).\n+# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de),\n+# Oliver Keller, Stefanie K\xc3\xb6nig and Lizzy Gerischer.\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# No extrinsic information on sequences given.\n+# Initialising the parameters using config directory /home/galaxy/galaxy/dependency_dir/augustus/3.1/iuc/package_augustus_3_1/24009970003a/config/ ...\n+# human version. Using default transition matrix.\n+# Looks like /home/galaxy/galaxy/database/files/000/dataset_2.dat is in fasta format.\n+# We have hints for 0 sequences and for 0 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 45179, name = contig1) -----\n+#\n+# Predicted genes for sequence number 1 on both strands\n+# start gene contig1.g1\n+contig1\tAUGUSTUS\tgene\t641\t23169\t1\t-\t.\tID=contig1.g1\n+contig1\tAUGUSTUS\ttranscript\t641\t23169\t.\t-\t.\tID=contig1.g1.t1;Parent=contig1.g1\n+contig1\tAUGUSTUS\tCDS\t641\t5409\t.\t-\t2\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t13455\t13545\t.\t-\t0\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t17461\t17543\t.\t-\t2\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t23019\t23169\t.\t-\t0\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tstart_codon\t23167\t23169\t.\t-\t0\tParent=contig1.g1.t1\n+# coding sequence = [atgcacattcagttgtctactctcgcctttcaggttcagactgcagatgagttcgtcgtcgatgcgcgtgatagacgat\n+# ccttcatccagcgcaggtgtcgacctgcgaattctctccgtacagcgtcacgggaagaatccggaataggagctgtaggtgttgctgataaatgtagt\n+# gcggatgcagaagttttaccgctgcattttgttggtgcgccaggttcgaaactgatgtcgagtcatgcaattgcttttcctataagtagccgagaatt\n+# gaacttacagatatctctaagatttttaatatttgcttacgaatacgctagcgcatgcgaaatttacagccttccgcggtttccatcaacgcctcaat\n+# ttgtcgcagttccattggcggctaatgatgaaaatgcatcaacatatgcatttgtaggtcctgcacgatattcatggaaagaagaggatattttatat\n+# gtgggaacgacgttcacgaacgttggtgattatcgccatgacgttcctgccatttcgtcccgtcggcttgatgatttaaactacgcagagttttcaat\n+# acagcagtcaattataaatatcgatgtcaaatatcgggatcatttcttagtcgattatgtttatggctttaactcttctgaatatgcgtactttgtta\n+# ttgttcaaaaaaaatcacatttagctgatgaggcaggttatgtaacccgtttggctcgaatatgtattacagatcccaattatgacagttatactgaa\n+# ataacagttcagtgtacggccactgaaaatcatattgactacaatatactacgcgatgccaaagtaactccggcaagccaaaaattagctcagaaaat\n+# gggtataaaaaaggacgatcacgtgttagtaactgttttttcgccctcgaaagagataagcgatcagccagaaagcaaatcggctatgtgcatatata\n+# gcataaaagacattgaggacatgtttattgaaaatattcatctgtgctttaacggaaccataaaggatagaaatttgggttatatatcgggcactatc\n+# aatgacggccggtgcccaatagttggctcgctcggtaacatatacaacttttgttctgtaggacttaagataagcggagtttctcctatcactacaca\n+# cgctctctttcattttgataatgtatcagttacgtcaataactgcaacgtcaacgactgatcagcagcattctcttgcttttcttggaaccgacaagg\n+# gattgataaaaaaagttttattatctggtcagaatccaggtgagtacgaagaaatagttgtggatgctggaaatcggatactaccaaacactatgatg\n+# tcgcccaaaaaagatttcctttacgttttatcgcaacgtaaaataactaaactcagaatcgagcattgttctgtatacacaaattgttcagcttgctt\n+# ggagtctcgggaccctttttgtggatggtgttcattggaaaaacggtgcaccgtgcggtcaacatgtcagcgagatacgtcagcatcgcgatggcttt\n+# ctttgggcagtgggcaacagtgtattgagtttgaatcaattatccctgagaaaataccaattactgatctaacacacctgcacctaataattcgaaca\n+# ctgcccgaaccttttaatgcaaaataccgatgtgtctttggaaactctacccctattgacgccgaaatcctggacaatggactcggatgtgctacccc\n+# cccactagatgaaagaccagtaataccaactaatacagaccatgttttggtgccattgtccgttagaagttcagagacaaataaggactttgtatcaa\n+# gattttttgcattctttgactgttcgcatcatggaaattgccaggaatgtttacaaagttcatggggctgcaactggtgtatttttgacaataaatgt\n+# gtccatcaatcaatacaatgtcgtaatatagaaaattcggtaactagtgttggtcaatgcccccatttaaaaagcaatcgtccggcgattcttttacc\n+# ggtgcgggtgccaaaagaaattcgtttagagatagaaaacttaccaaaacccaaaagcgctcacgctggattcttgtgtacagttcatattgaagctg\n+# ctcagatgctattgcctgcccacattgagtcaaacaagattgttgtttgtgaaaaaacaccttatttctacgagactaatacacatgaataccaagca\n+# aaggttgtaattacatggaatttccagcactatgtggacacggcgattgttaca'..b'=contig70.g235\n+contig70\tAUGUSTUS\ttranscript\t24505\t50605\t.\t+\t.\tID=contig70.g235.t1;Parent=contig70.g235\n+contig70\tAUGUSTUS\tstart_codon\t24505\t24507\t.\t+\t0\tParent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t24505\t24577\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t27274\t27411\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t28524\t28636\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t33015\t33225\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t38419\t38560\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t45726\t46035\t.\t+\t1\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t46098\t46478\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t46932\t47068\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t47124\t47263\t.\t+\t1\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t50403\t50605\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+# coding sequence = [atgattaaaaaaagtttagagacggcagcgcttgtaaattacacccgtctctccaataaagctaagattgaaggcactt\n+# ttccgttcaagggacacgtgccagggaaatcctatcggaatttcaaaatttttccaagcactttcctgtacacgggaaacgtcccagggaaagcctat\n+# cggaaaggtcccagggaaatcccgtcggatattcaggatttgcgaggagaggttattgttccgcccccaaaaaaactagaggacctaattcacttagc\n+# agaactttgtgttgatctgttgcaacaaaatgaagagcactatggagaactgcgcaaacatgacaaaatggataaaattaaaatgcgtaaggaagatg\n+# atgatgtaccaaaaggccacaatgaaagcgatattgatttaaccgccaatactggactcagtagcacatcagacctggcttctgcagcatcaactaat\n+# ggatcgtcatttcgttattgtatgccgacacatgcagtatacaccacgccagtaccaacggcatataatttgaggaatgggcgttttcatcagcacct\n+# gcgagacacattcgcaccattagtcgtgcggtacgtggatctgatggaatcttcaatagctcagtcgattcataagggatttgaaaaagaacgctggg\n+# aaagtaaagggaacggatgtgccacctctgaagacttattttggaaactagatgctctacagtcgtttataagagacctgcactggccagacgcagag\n+# tttcgacaacatttagaacagcgtcttaaaatgatggccgtcgatatgatagagcaatgtatacaacgaactgattcgtcttttcagtcgtggctaaa\n+# aaaaaacattgccttcatatcaactgattatattttaccttcagaaatgtgcgctatggtcaatgtgatattagatgctaaaaatcaaagctttaaat\n+# tgactactattgacggcattgatttgtataaatttcatgcaaaaattgacgaccaaatcgacaaagcgaatgtagctatgacacaaggtctaactggt\n+# aaacttatgtcagtgctagagtcgactttgtcaaaattagcacgatacgacgaaggtagcctaatcggctcgattcttagttttacaaatgtatcgag\n+# ctcgggaaaggatctcgggcaaggatatgtaaatttctttagaaataatatggatcaagtacgaggaaaaattggcgacgatttatggaccctgaatt\n+# tctttgagcagtggtactcgcagcagattaacatgctatgtaattggctttcggaacgtttggaccacgctctgcactacgctcaagtttcatctatt\n+# tctcacattatcaagaaaatatattcagacttcgaattacaaggtgtattagaagataaattaaactctaaagcatatcaagcagtcgcacagcgaat\n+# ggcgacagaggaagcgacatgtgctttgacaatgcctgatgttagcgaagatgaaccctgtgacgacattcgagaaggggaagaagaagatactggcg\n+# acgaatctacctctaacataccaaggggcttaccaaaaccaaaaattgctgccgctcaagctgctgctgttaccaacgttgttgccggccgtgtggca\n+# accggaacatcaccagcaggtagtgccacattaatccgccttgaccagcaacaacaagagcgatacagacggcagcaggatcagttgcacgatgagca\n+# gcaagaaagccatgcccgacaaaagcaggaacttgctcttgcatcttacaccccagggacggtcgtcaatggattgggtcaatcatcagtcacggcaa\n+# gttag]\n+# protein sequence = [MIKKSLETAALVNYTRLSNKAKIEGTFPFKGHVPGKSYRNFKIFPSTFLYTGNVPGKAYRKGPREIPSDIQDLRGEVI\n+# VPPPKKLEDLIHLAELCVDLLQQNEEHYGELRKHDKMDKIKMRKEDDDVPKGHNESDIDLTANTGLSSTSDLASAASTNGSSFRYCMPTHAVYTTPVP\n+# TAYNLRNGRFHQHLRDTFAPLVVRYVDLMESSIAQSIHKGFEKERWESKGNGCATSEDLFWKLDALQSFIRDLHWPDAEFRQHLEQRLKMMAVDMIEQ\n+# CIQRTDSSFQSWLKKNIAFISTDYILPSEMCAMVNVILDAKNQSFKLTTIDGIDLYKFHAKIDDQIDKANVAMTQGLTGKLMSVLESTLSKLARYDEG\n+# SLIGSILSFTNVSSSGKDLGQGYVNFFRNNMDQVRGKIGDDLWTLNFFEQWYSQQINMLCNWLSERLDHALHYAQVSSISHIIKKIYSDFELQGVLED\n+# KLNSKAYQAVAQRMATEEATCALTMPDVSEDEPCDDIREGEEEDTGDESTSNIPRGLPKPKIAAAQAAAVTNVVAGRVATGTSPAGSATLIRLDQQQQ\n+# ERYRRQQDQLHDEQQESHARQKQELALASYTPGTVVNGLGQSSVTAS]\n+# end gene contig70.g235\n+###\n+# command line:\n+# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /home/galaxy/galaxy/database/files/000/dataset_2.dat --UTR=off --genemodel=complete --species=human\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustusOutput.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/augustusOutput.html Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,1 @@ +<html><body><p>The following generated by Hub Archive Creator:</p><ul><li><a href="./myHub.zip">./myHub.zip</a></li><li><a href="myHub/dbia.html">myHub/dbia.html</a></li><li><a href="myHub/hub.txt">myHub/hub.txt</a></li><li><a href="myHub/genomes.txt">myHub/genomes.txt</a></li><li><a href="myHub/dbia3/dataset_7.2bit">myHub/dbia3/dataset_7.2bit</a></li><li><a href="myHub/dbia3/groups.txt">myHub/dbia3/groups.txt</a></li><li><a href="myHub/dbia3/trackDb.txt">myHub/dbia3/trackDb.txt</a></li><li><a href="myHub/dbia3/description.html">myHub/dbia3/description.html</a></li><li><a href="myHub/dbia3/tracks/augustusDbia3.bb">myHub/dbia3/tracks/augustusDbia3.bb</a></li><ul></body></html> \ No newline at end of file |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/dbia3.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dbia3.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/glimmerHMM_output.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/glimmerHMM_output.gff3 Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,22 @@ +##gff-version 3 +##sequence-region contig1 1 45179 +contig1 GlimmerHMM mRNA 641 20329 . - . ID=contig1.path1.gene1;Name=contig1.path1.gene1 +contig1 GlimmerHMM CDS 641 5409 . - 2 ID=contig1.cds1.1;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=final-exon +contig1 GlimmerHMM CDS 12541 12968 . - 1 ID=contig1.cds1.2;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon +contig1 GlimmerHMM CDS 14821 14944 . - 2 ID=contig1.cds1.3;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon +contig1 GlimmerHMM CDS 15123 15424 . - 1 ID=contig1.cds1.4;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon +contig1 GlimmerHMM CDS 17402 17543 . - 2 ID=contig1.cds1.5;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon +contig1 GlimmerHMM CDS 17886 17986 . - 1 ID=contig1.cds1.6;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon +contig1 GlimmerHMM CDS 20226 20329 . - 0 ID=contig1.cds1.7;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=initial-exon +contig1 GlimmerHMM mRNA 22912 26939 . - . ID=contig1.path1.gene2;Name=contig1.path1.gene2 +contig1 GlimmerHMM CDS 22912 23136 . - 0 ID=contig1.cds2.1;Parent=contig1.path1.gene2;Name=contig1.path1.gene2;Note=final-exon +contig1 GlimmerHMM CDS 23431 23705 . - 2 ID=contig1.cds2.2;Parent=contig1.path1.gene2;Name=contig1.path1.gene2;Note=internal-exon +contig1 GlimmerHMM CDS 26810 26939 . - 0 ID=contig1.cds2.3;Parent=contig1.path1.gene2;Name=contig1.path1.gene2;Note=initial-exon +contig1 GlimmerHMM mRNA 29101 41509 . - . ID=contig1.path1.gene3;Name=contig1.path1.gene3 +contig1 GlimmerHMM CDS 29101 29152 . - 1 ID=contig1.cds3.1;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=final-exon +contig1 GlimmerHMM CDS 31365 33826 . - 0 ID=contig1.cds3.2;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon +contig1 GlimmerHMM CDS 33880 34044 . - 0 ID=contig1.cds3.3;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon +contig1 GlimmerHMM CDS 34107 34619 . - 0 ID=contig1.cds3.4;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon +contig1 GlimmerHMM CDS 34674 35311 . - 2 ID=contig1.cds3.5;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon +contig1 GlimmerHMM CDS 35384 35766 . - 1 ID=contig1.cds3.6;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon +contig1 GlimmerHMM CDS 41472 41509 . - 0 ID=contig1.cds3.7;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=initial-exon |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.bb |
b |
Binary file test-data/tblastN/dbia3.xml.bb has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.sorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/dbia3.xml.sorted.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,10 @@ +contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243, +contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0, +contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0, +contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278, +contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig1 41470 41509 ci-PA 1000 - 41470 41509 0 1 39, 0, +contig1 41470 41509 ci-PC 1000 - 41470 41509 0 1 39, 0, |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.unbb.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/dbia3.xml.unbb.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,10 @@ +contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243, +contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0, +contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0, +contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278, +contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig1 41470 41509 ci-PA 1000 - 41470 41509 0 1 39, 0, +contig1 41470 41509 ci-PC 1000 - 41470 41509 0 1 39, 0, |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.unsorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/dbia3.xml.unsorted.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,10 @@ +contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0, +contig1 41470 41509 ci-PA 1000 - 41470 41509 0 1 39, 0, +contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243, +contig1 41470 41509 ci-PC 1000 - 41470 41509 0 1 39, 0, +contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278, +contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0, |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/hubaInputs/tblastn_Dbia3_ci.xml.sorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/hubaInputs/tblastn_Dbia3_ci.xml.sorted.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,50 @@ +contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243, +contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0, +contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0, +contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278, +contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig2 16199 18659 ci-PA 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig2 16199 18659 ci-PB 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig2 18601 18889 ci-PC 0 - 18601 18889 0 5 63,36,51,54,45, 0,75,123,180,243, +contig2 18711 20598 ci-PC 584 - 18711 20598 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig2 18712 18877 ci-PA 964 - 18712 18877 0 1 165, 0, +contig2 18712 18877 ci-PB 964 - 18712 18877 0 1 165, 0, +contig2 18939 20268 ci-PB 560 - 18939 20268 0 4 510,210,432,51, 0,564,774,1278, +contig2 18939 20598 ci-PA 604 - 18939 20598 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig23 23849 24035 ci-PA 0 - 23849 24035 0 3 51,21,96, 0,51,90, +contig23 23849 24035 ci-PB 0 - 23849 24035 0 3 51,21,96, 0,51,90, +contig23 32405 32564 ci-PA 0 - 32405 32564 0 2 63,96, 0,63, +contig23 32405 32564 ci-PB 0 - 32405 32564 0 2 63,96, 0,63, +contig23 32405 32573 ci-PA 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153, +contig23 32405 32573 ci-PB 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153, +contig23 32471 32564 ci-PC 0 - 32471 32564 0 1 93, 0, +contig24 3846 4032 ci-PA 0 - 3846 4032 0 3 51,21,96, 0,51,90, +contig24 3846 4032 ci-PB 0 - 3846 4032 0 3 51,21,96, 0,51,90, +contig24 12402 12561 ci-PA 0 - 12402 12561 0 2 63,96, 0,63, +contig24 12402 12561 ci-PB 0 - 12402 12561 0 2 63,96, 0,63, +contig24 12402 12570 ci-PA 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153, +contig24 12402 12570 ci-PB 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153, +contig24 12468 12561 ci-PC 0 - 12468 12561 0 1 93, 0, +contig66 33180 33312 ci-PA 0 - 33180 33312 0 1 132, 0, +contig66 33180 33312 ci-PB 0 - 33180 33312 0 1 132, 0, +contig66 33204 33303 ci-PA 0 - 33204 33303 0 1 99, 0, +contig66 33204 33303 ci-PB 0 - 33204 33303 0 1 99, 0, +contig66 35474 35663 ci-PA 0 - 35474 35663 0 1 189, 0, +contig66 35474 35663 ci-PB 0 - 35474 35663 0 1 189, 0, +contig66 35516 35762 ci-PA 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153, +contig66 35516 35762 ci-PB 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153, +contig66 35534 35756 ci-PA 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186, +contig66 35534 35756 ci-PB 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186, +contig67 18177 18309 ci-PA 0 - 18177 18309 0 1 132, 0, +contig67 18177 18309 ci-PB 0 - 18177 18309 0 1 132, 0, +contig67 18201 18300 ci-PA 0 - 18201 18300 0 1 99, 0, +contig67 18201 18300 ci-PB 0 - 18201 18300 0 1 99, 0, +contig67 20471 20660 ci-PA 0 - 20471 20660 0 1 189, 0, +contig67 20471 20660 ci-PB 0 - 20471 20660 0 1 189, 0, +contig67 20513 20759 ci-PA 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153, +contig67 20513 20759 ci-PB 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153, +contig67 20531 20753 ci-PA 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186, +contig67 20531 20753 ci-PB 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186, |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/readme/README.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/readme/README.html Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,36 @@ +<h1 id="conversion-of-ncbi-blast-tblastn-results-to-psl-format">Conversion of NCBI BLAST+ tblastn results to PSL format</h1> +<p>Wilson Leung <script type="text/javascript"> +<!-- +h='wustl.edu';a='@';n='wleung';e=n+a+h; +document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'" clas'+'s="em' + 'ail">'+e+'<\/'+'a'+'>'); +// --> +</script><noscript>wleung at wustl dot edu</noscript></p> +<p>Last Update: 04/24/2016</p> +<h2 id="version-information">Version information</h2> +<ul> +<li>Kent source tree: v324</li> +<li>NCBI BLAST+: BLAST 2.2.30+</li> +</ul> +<h2 id="data-sources">Data sources</h2> +<p>For testing purposes, the database consists of only contig1 in the Dbia3 assembly while the protein sequences correspond to the three isoforms of the <em>D. melanogaster</em> <em>ci</em> gene in contig1. The protein sequences are available through <a href="http://flybase.org/cgi-bin/getseq.html?source=dmel&id=FBgn0004859&chr=4&dump=PrecompiledFasta&targetset=translation">FlyBase</a>.</p> +<ul> +<li>Dbia3.fa = contig1 sequence in the Dbia3 asssembly</li> +<li>ci.pep = Protein sequences for the three isoforms of the <em>ci</em> gene in <em>D. melanogaster</em></li> +</ul> +<h2 id="conversion-protocol">Conversion protocol</h2> +<ol style="list-style-type: decimal"> +<li><p>Create BLAST database for the assembly</p> +<pre><code>makeblastdb -in Dbia3.fa -dbtype nucl</code></pre></li> +<li><p>Perform tblastn search and output results in XML format</p> +<pre><code>tblastn -outfmt 5 -db Dbia3.fa -query ci.pep -out tblastn_Dbia3_ci.xml -evalue 1e-2</code></pre></li> +<li><p>Convert results into PSL format</p> +<pre><code>blastXmlToPsl -convertToNucCoords tblastn_Dbia3_ci.xml tblastn_Dbia3_ci.xml.psl</code></pre></li> +<li><p>Convert PSL output into BED format</p> +<pre><code>pslToBed tblastn_Dbia3_ci.xml.psl tblastn_Dbia3_ci.xml.bed</code></pre></li> +</ol> +<h2 id="output-files">Output files</h2> +<ul> +<li>tblastn_Dbia3_ci.xml = tblastn results in XML format</li> +<li>tblastn_Dbia3_ci.xml.psl = tblastn results in PSL format</li> +<li>tblastn_Dbia3_ci.xml.bed = tblastn results in BED format</li> +</ul> |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/readme/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/readme/README.md Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,48 @@ +Conversion of NCBI BLAST+ tblastn results to PSL format +======================================================= +Wilson Leung <wleung@wustl.edu> + +Last Update: 04/24/2016 + + +Version information +------------------- +* Kent source tree: v324 +* NCBI BLAST+: BLAST 2.2.30+ + +Data sources +------------------- +For testing purposes, the database consists of only contig1 in the Dbia3 assembly while the protein sequences correspond to the three isoforms of the *D. melanogaster* *ci* gene in contig1. The protein sequences are available through [FlyBase](http://flybase.org/cgi-bin/getseq.html?source=dmel&id=FBgn0004859&chr=4&dump=PrecompiledFasta&targetset=translation). + +* Dbia3.fa = contig1 sequence in the Dbia3 asssembly +* ci.pep = Protein sequences for the three isoforms of the *ci* gene in *D. melanogaster* + +Conversion protocol +----------------------- +1. Create BLAST database for the assembly +``` +makeblastdb -in Dbia3.fa -dbtype nucl +``` + +2. Perform tblastn search and output results in XML format +``` +tblastn -outfmt 5 -db Dbia3.fa -query ci.pep -out tblastn_Dbia3_ci.xml -evalue 1e-2 +``` + +3. Convert results into PSL format +``` +blastXmlToPsl -convertToNucCoords tblastn_Dbia3_ci.xml tblastn_Dbia3_ci.xml.psl +``` + +4. Convert PSL output into BED format +``` +pslToBed tblastn_Dbia3_ci.xml.psl tblastn_Dbia3_ci.xml.bed +``` + +Output files +----------------------- +* tblastn_Dbia3_ci.xml = tblastn results in XML format +* tblastn_Dbia3_ci.xml.psl = tblastn results in PSL format +* tblastn_Dbia3_ci.xml.bed = tblastn results in BED format + + |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/ci.pep --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/workflowInputs/ci.pep Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,45 @@ +>ci-PA FBpp0088245 +MDAYALPTYFPLAYSELQFLASRRAAAVAAAATVLPGSPCINQHHPTDVSSSVTVPSIIPTGGTSDSIKTSIQPQICNEN +TLLGNAGHQHNHQPQHVHNINVTGQPHDFHPAYRIPGYMEQLYSLQRTNSASSFHDPYVNCASAFHLAGLGLGSADFLGS +RGLSSLGELHNAAVAAAAAGSLASTDFHFSVDGNRRLGSPRPPGGSIRASISRKRALSSSPYSDSFDINSMIRFSPNSLA +TIMNGSRGSSAASGSYGHISATALNPMSHVHSTRLQQIQAHLLRASAGLLNPMTPQQVAASGFSIGHMPTSASLRVNDVH +PNLSDSHIQITTSPTVTKDVSQVPAAAFSLKNLDDAREKKGPFKDVVPEQPSSTSGGVAQVEADSASSQLSDRCYNNVVN +NITGIPGDVKVNSRLDEYINCGSISIPSNEYDCANADTTDIKDEPGDFIETNCHWRSCRIEFITQDELVKHINNDHIQTN +KKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEKPYTCEYPGCSKAFSN +ASDRAKHQNRTHSNEKPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRLQQNNSRHNLQEHNID +SSPCSEDSHLGKMLGTSSPSIKSESDISSSNHHLVNGVRASDSLLTYSPDDLAENLNLDDGWNCDDDVDVADLPIVLRAM +VNIGNGNASASTIGGSVLARQRFRGRLQTKGINSSTIMLCNIPESNRTFGISELNQRITELKMEPGTDAEIKIPKLPNTT +IGGYTEDPLQNQTSFRNTVSNKQGTVSGSIQGQFRRDSQNSTASTYYGSMQSRRSSQSSQVSSIPTMRPNPSCNSTASFY +DPISPGCSRRSSQMSNGANCNSFTSTSGLPVLNKESNKSLNACINKPNIGVQGVGIYNSSLPPPPSSHLIATNLKRLQRK +DSEYHNFTSGRFSVPSYMHSLHIKNNKPVGENEFDKAIASNARRQTDPVPNINLDPLTNISRFSTTPHSFDINVGKTNNI +ASSINKDNLRKDLFTVSIKADMAMTSDQHPNERINLDEVEELILPDEMLQYLNLVKDDTNHLEKEHQAVPVGSNVSETIA +SNHYREQSNIYYTNKQILTPPSNVDIQPNTTKFTVQDKFAMTAVGGSFSQRELSTLAVPNEHGHAKCESFHHQSQKYMNT +DIGSKQQSALPSAHQRQTEKSNYNQIIDSSMTSLPELNVDSIYPRNETENIFKVHGDHDNEIQCGIISQSQMSPSTNLNN +DGQFSTVNMQPITTSKLFPPEPQKIVCDTQASNTSVMHLDTYQRTLEYVQSCQNWMETNNTSTNQIQSLPGMPVNNTLFP +DVSSSTHPYHGTNMVINDMTTSLTSLLEENRYLQMMQ +>ci-PC FBpp0300417 +MDAYALPTYFPLAYSELQFLASRRAAAVAAAATVLPGSPCINQHHPTDVSSSVTVPSIIPTGGTSDSIKTSIQPQICNEN +TLLGNAGHQHNHQPQHVHNINVTGQPHDFHPAYRIPGYMEQLYSLQRTNSASSFHDPYVNCASAFHLAGLGLGSADFLGS +RGLSSLGELHNAAVAAAAAGSLASTDFHFSVDGNRRLGSPRPPGGSIRASISRKRALSSSPYSDSFDINSMIRFSPNSLA +TIMNGSRGSSAASGSYGHISATALNPMSHVHSTRLQQIQAHLLRASAGLLNPMTPQQVAASGFSIGHMPTSASLRVNDVH +PNLSDSHIQITTSPTVTKDVSQVPAAAFSLKNLDDAREKKGPFKDVVPEQPSSTSGGVAQVEADSASSQLSDRCYNNVVN +NITGIPGDVKVNSRLDEYINCGSISIPSNEYDCANADTTDIKDEPGDFIETNCHWRSCRIEFITQDELVKHINNDHIQTN +KKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTEKNHTLASIRDVAKPLVMLVIAQSIKIEHTVMRNRTFVKH +LDAQNVTPTRAL +>ci-PB FBpp0297298 +MEQLYSLQRTNSASSFHDPYVNCASAFHLAGLGLGSADFLGSRGLSSLGELHNAAVAAAAAGSLASTDFHFSVDGNRRLG +SPRPPGGSIRASISRKRALSSSPYSDSFDINSMIRFSPNSLATIMNGSRGSSAASGSYGHISATALNPMSHVHSTRLQQI +QAHLLRASAGLLNPMTPQQVAASGFSIGHMPTSASLRVNDVHPNLSDSHIQITTSPTVTKDVSQVPAAAFSLKNLDDARE +KKGPFKDVVPEQPSSTSGGVAQVEADSASSQLSDRCYNNVVNNITGIPGDVKVNSRLDEYINCGSISIPSNEYDCANADT +TDIKDEPGDFIETNCHWRSCRIEFITQDELVKHINNDHIQTNKKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHK +CTFEGCFKAYSRLENLKTHLRSHTGEKPYTCEYPGCSKAFSNASDRAKHQNRTHSNEKPYICKAPGCTKRYTDPSSLRKH +VKTVHGAEFYANKKHKGLPLNDANSRLQQNNSRHNLQEHNIDSSPCSEDSHLGKMLGTSSPSIKSESDISSSNHHLVNGV +RASDSLLTYSPDDLAENLNLDDGWNCDDDVDVADLPIVLRAMVNIGNGNASASTIGGSVLARQRFRGRLQTKGINSSTIM +LCNIPESNRTFGISELNQRITELKMEPGTDAEIKIPKLPNTTIGGYTEDPLQNQTSFRNTVSNKQGTVSGSIQGQFRRDS +QNSTASTYYGSMQSRRSSQSSQVSSIPTMRPNPSCNSTASFYDPISPGCSRRSSQMSNGANCNSFTSTSGLPVLNKESNK +SLNACINKPNIGVQGVGIYNSSLPPPPSSHLIATNLKRLQRKDSEYHNFTSGRFSVPSYMHSLHIKNNKPVGENEFDKAI +ASNARRQTDPVPNINLDPLTNISRFSTTPHSFDINVGKTNNIASSINKDNLRKDLFTVSIKADMAMTSDQHPNERINLDE +VEELILPDEMLQYLNLVKDDTNHLEKEHQAVPVGSNVSETIASNHYREQSNIYYTNKQILTPPSNVDIQPNTTKFTVQDK +FAMTAVGGSFSQRELSTLAVPNEHGHAKCESFHHQSQKYMNTDIGSKQQSALPSAHQRQTEKSNYNQIIDSSMTSLPELN +VDSIYPRNETENIFKVHGDHDNEIQCGIISQSQMSPSTNLNNDGQFSTVNMQPITTSKLFPPEPQKIVCDTQASNTSVMH +LDTYQRTLEYVQSCQNWMETNNTSTNQIQSLPGMPVNNTLFPDVSSSTHPYHGTNMVINDMTTSLTSLLEENRYLQMMQ |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/dbia3.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/workflowInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,1174 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+ <BlastOutput_program>tblastn</BlastOutput_program>\n+ <BlastOutput_version>TBLASTN 2.3.0+</BlastOutput_version>\n+ <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+ <BlastOutput_db>dbia3.fasta</BlastOutput_db>\n+ <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+ <BlastOutput_query-def>ci-PA FBpp0088245</BlastOutput_query-def>\n+ <BlastOutput_query-len>1397</BlastOutput_query-len>\n+ <BlastOutput_param>\n+ <Parameters>\n+ <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+ <Parameters_expect>0.01</Parameters_expect>\n+ <Parameters_gap-open>11</Parameters_gap-open>\n+ <Parameters_gap-extend>1</Parameters_gap-extend>\n+ <Parameters_filter>L;</Parameters_filter>\n+ </Parameters>\n+ </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+ <Iteration_iter-num>1</Iteration_iter-num>\n+ <Iteration_query-ID>Query_1</Iteration_query-ID>\n+ <Iteration_query-def>ci-PA FBpp0088245</Iteration_query-def>\n+ <Iteration_query-len>1397</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+ <Hit_num>1</Hit_num>\n+ <Hit_id>gnl|BL_ORD_ID|1</Hit_id>\n+ <Hit_def>contig2</Hit_def>\n+ <Hit_accession>1</Hit_accession>\n+ <Hit_len>45017</Hit_len>\n+ <Hit_hsps>\n+ <Hsp>\n+ <Hsp_num>1</Hsp_num>\n+ <Hsp_bit-score>852.818</Hsp_bit-score>\n+ <Hsp_score>2202</Hsp_score>\n+ <Hsp_evalue>0</Hsp_evalue>\n+ <Hsp_query-from>576</Hsp_query-from>\n+ <Hsp_query-to>1396</Hsp_query-to>\n+ <Hsp_hit-from>16200</Hsp_hit-from>\n+ <Hsp_hit-to>18659</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>-1</Hsp_hit-frame>\n+ <Hsp_identity>540</Hsp_identity>\n+ <Hsp_positive>627</Hsp_positive>\n+ <Hsp_gaps>39</Hsp_gaps>\n+ <Hsp_align-len>840</Hsp_align-len>\n+ <Hsp_qseq>KPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRLQQNNS--RHNLQEHNIDSSPCSEDSHLGKMLGTXXXXXXXXXXXXXXNHHLVNGVRASDSLLTYSPDDLAEXXXXXXXXXXXXXXXXXXXXXXXRAMVNIGNGNASASTIGGSVLARQRFRGRLQTKGINSSTIMLCNIPESNRTFGISELNQRITELKMEPGTDAEIKIPKLPNTTIGGYTEDPLQNQTSFRNTVSNKQG--TVSGSIQGQFRRDSQNSTASTYYGXXXXXXXXXXXXXXXIPTMRPNPSCN-STASFYDPISPGCSRRSSQMSNGANCNSFTSTSGLPVLNKE--SNKSLNACINKPNIGVQGVGIYNXXXXXXXXXHLIATNLKRLQRKDSE--YHNFTSGRFSVPSYMHSLHIKNNKPVGENEFDKAIASNA-RRQTDPVPNINLDPLTNISRFSTTPHSFDINVGKTN--NIASSINKDNLRKDLFTVSIKADMAMTSDQHPNERINLDEVEELILPDEMLQYLNLVKDDTNHLEKEHQAVPVGSNVSETIASNH--YREQSNIYYTNKQILTPPSNVDI----QPNTTKFTVQDKFAMTAVGGSFSQRELSTLAVPNEHGHAKCESFHHQSQKYMNTDIGSKQQSALPSAHQRQTEKSNYNQIIDSSMTSLPELNVDSIYPRNETENIFKVHGDHDNEIQCGIISQSQMSPSTNLNNDGQFSTVNMQPITTSKLF-PPEPQKIVCDTQASNTSVMHLDTYQRTLEYVQSCQNWMETNNTSTNQIQSLPGMPVNNTLFPDVSSSTHPYHGTNMVINDMTTSLTSLLEENRYLQMM</Hsp_qseq>\n+ <Hsp_hseq>KPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRLHRDSGQGRHNLQEHNIDSSPCSEELQVGKLIGMSSPSIKSESDESSPHHQLLSGVRASDCFLTYSQDGAAEHITLDDGWDCDDDVDVADLPIVLRAMVNVGSGNSMAPTIGGGVVARQRFRSRLQTKGINSSMVMLGNIPESNRTIGISELNQRITELKMEPGTACDITVPL--NTGLERISEDLSQNQS---NITLNKQSFFTASGSLQGHFRRDSQNSTASTYYGSMQSRRSSQSSQVSSISTMRPGPSYNTTTASLYDPISPGCSRRSSQMSNVVNSYALTSTSGLTAINKDLNANSSPNASINKPGLGGQYFGFYNNSLPPPPSSHLIATNLKHLQDTDSRSCYHNTTGGRFSIPNCTPSLHLDYNGPAGEQEIDKEIPNNILRRQSEPMPNISLDTLTNVSPLSGPLQNLQFPIGKARNVNITSSSNENTLRKGPCHATMKTEMTMTSEQHPNERINLDEVEELILPDEMLQYLNLVKDDQNYMEKDD--VAIRSTVPKTIKSNENLLLSKSNLNPIKKQIILPTSNFDVSINLQPNTSNLQTQEEHTMTTIGGLPSQREQN--IVPHQHEKTKCRSFPQEIDKTINIDIGFKEQPYPSSAYQPQITKSNQNEIIDSSMTSLPELN--PIFTKINSENVSKLHRDQNSEIQCGIVSQSQMSPSININNDGETSTLKNLPLTYSKFSGQPNTQTTVG---GSNTSSMVSDTYQRTLEYVQSCQNWVDTNNSSGDQIQS------NNTLWSDVSSSTHPYAGTNLVINDMTTSLTSLLEENRYLHMM</Hsp_hseq>\n+ <Hsp_midline>KPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRL +++ R'..b' <Hsp_midline>H +K C +GC K + ++ HL +H G + + C C KAF +S +HQ H+ EKP+ C GC KR++ +LR</Hsp_midline>\n+ </Hsp>\n+ <Hsp>\n+ <Hsp_num>2</Hsp_num>\n+ <Hsp_bit-score>47.3654</Hsp_bit-score>\n+ <Hsp_score>111</Hsp_score>\n+ <Hsp_evalue>6.65795e-06</Hsp_evalue>\n+ <Hsp_query-from>362</Hsp_query-from>\n+ <Hsp_query-to>441</Hsp_query-to>\n+ <Hsp_hit-from>35535</Hsp_hit-from>\n+ <Hsp_hit-to>35756</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>-1</Hsp_hit-frame>\n+ <Hsp_identity>29</Hsp_identity>\n+ <Hsp_positive>37</Hsp_positive>\n+ <Hsp_gaps>6</Hsp_gaps>\n+ <Hsp_align-len>80</Hsp_align-len>\n+ <Hsp_qseq>NKKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEKPYTCEYPGCSKAFS</Hsp_qseq>\n+ <Hsp_hseq>NDKKIACPHKGC---HKNFRDSSAMRKHLHTH-GPRVHVCA--ECGKAFVESSKLKRHQLVHTGEKPFQCTFEGCGKRFS</Hsp_hseq>\n+ <Hsp_midline>N K C + C K F+ + H+ H G + H C C KA+ LK H HTGEKP+ C + GC K FS</Hsp_midline>\n+ </Hsp>\n+ <Hsp>\n+ <Hsp_num>3</Hsp_num>\n+ <Hsp_bit-score>41.5874</Hsp_bit-score>\n+ <Hsp_score>96</Hsp_score>\n+ <Hsp_evalue>0.000398301</Hsp_evalue>\n+ <Hsp_query-from>384</Hsp_query-from>\n+ <Hsp_query-to>427</Hsp_query-to>\n+ <Hsp_hit-from>33181</Hsp_hit-from>\n+ <Hsp_hit-to>33312</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>-3</Hsp_hit-frame>\n+ <Hsp_identity>16</Hsp_identity>\n+ <Hsp_positive>28</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>44</Hsp_align-len>\n+ <Hsp_qseq>YMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEK</Hsp_qseq>\n+ <Hsp_hseq>FIYRTHVRIHTGDRPFVCPFDACNKKFAQSTNLKSHILTHAKAK</Hsp_hseq>\n+ <Hsp_midline>++ H+R HTG++P C F+ C K +++ NLK+H+ +H K</Hsp_midline>\n+ </Hsp>\n+ <Hsp>\n+ <Hsp_num>4</Hsp_num>\n+ <Hsp_bit-score>40.817</Hsp_bit-score>\n+ <Hsp_score>94</Hsp_score>\n+ <Hsp_evalue>0.000632218</Hsp_evalue>\n+ <Hsp_query-from>369</Hsp_query-from>\n+ <Hsp_query-to>431</Hsp_query-to>\n+ <Hsp_hit-from>35475</Hsp_hit-from>\n+ <Hsp_hit-to>35663</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>-1</Hsp_hit-frame>\n+ <Hsp_identity>23</Hsp_identity>\n+ <Hsp_positive>28</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>63</Hsp_align-len>\n+ <Hsp_qseq>RWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEKPYTC</Hsp_qseq>\n+ <Hsp_hseq>RVHVCAECGKAFVESSKLKRHQLVHTGEKPFQCTFEGCGKRFSLDFNLRYSTKKFWFSYKFVC</Hsp_hseq>\n+ <Hsp_midline>R C K F L H HTGEKP +CTFEGC K +S NL+ + + C</Hsp_midline>\n+ </Hsp>\n+ <Hsp>\n+ <Hsp_num>5</Hsp_num>\n+ <Hsp_bit-score>37.7354</Hsp_bit-score>\n+ <Hsp_score>86</Hsp_score>\n+ <Hsp_evalue>0.00636923</Hsp_evalue>\n+ <Hsp_query-from>417</Hsp_query-from>\n+ <Hsp_query-to>449</Hsp_query-to>\n+ <Hsp_hit-from>33205</Hsp_hit-from>\n+ <Hsp_hit-to>33303</Hsp_hit-to>\n+ <Hsp_query-frame>0</Hsp_query-frame>\n+ <Hsp_hit-frame>-3</Hsp_hit-frame>\n+ <Hsp_identity>12</Hsp_identity>\n+ <Hsp_positive>23</Hsp_positive>\n+ <Hsp_gaps>0</Hsp_gaps>\n+ <Hsp_align-len>33</Hsp_align-len>\n+ <Hsp_qseq>KTHLRSHTGEKPYTCEYPGCSKAFSNASDRAKH</Hsp_qseq>\n+ <Hsp_hseq>RTHVRIHTGDRPFVCPFDACNKKFAQSTNLKSH</Hsp_hseq>\n+ <Hsp_midline>+TH+R HTG++P+ C + C+K F+ +++ H</Hsp_midline>\n+ </Hsp>\n+ </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+ <Iteration_stat>\n+ <Statistics>\n+ <Statistics_db-num>70</Statistics_db-num>\n+ <Statistics_db-len>3333194</Statistics_db-len>\n+ <Statistics_hsp-len>96</Statistics_hsp-len>\n+ <Statistics_eff-space>1306438952</Statistics_eff-space>\n+ <Statistics_kappa>0.041</Statistics_kappa>\n+ <Statistics_lambda>0.267</Statistics_lambda>\n+ <Statistics_entropy>0.14</Statistics_entropy>\n+ </Statistics>\n+ </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,50 @@ +contig2 16199 18659 ci-PA 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig2 18939 20598 ci-PA 604 - 18939 20598 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig2 18712 18877 ci-PA 964 - 18712 18877 0 1 165, 0, +contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485, +contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0, +contig24 12402 12561 ci-PA 0 - 12402 12561 0 2 63,96, 0,63, +contig24 12402 12570 ci-PA 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153, +contig24 3846 4032 ci-PA 0 - 3846 4032 0 3 51,21,96, 0,51,90, +contig23 32405 32564 ci-PA 0 - 32405 32564 0 2 63,96, 0,63, +contig23 32405 32573 ci-PA 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153, +contig23 23849 24035 ci-PA 0 - 23849 24035 0 3 51,21,96, 0,51,90, +contig67 20513 20759 ci-PA 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153, +contig67 20531 20753 ci-PA 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186, +contig67 18177 18309 ci-PA 0 - 18177 18309 0 1 132, 0, +contig67 20471 20660 ci-PA 0 - 20471 20660 0 1 189, 0, +contig67 18201 18300 ci-PA 0 - 18201 18300 0 1 99, 0, +contig66 35516 35762 ci-PA 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153, +contig66 35534 35756 ci-PA 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186, +contig66 33180 33312 ci-PA 0 - 33180 33312 0 1 132, 0, +contig66 35474 35663 ci-PA 0 - 35474 35663 0 1 189, 0, +contig66 33204 33303 ci-PA 0 - 33204 33303 0 1 99, 0, +contig2 18711 20598 ci-PC 584 - 18711 20598 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig2 18601 18889 ci-PC 0 - 18601 18889 0 5 63,36,51,54,45, 0,75,123,180,243, +contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716, +contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243, +contig24 12468 12561 ci-PC 0 - 12468 12561 0 1 93, 0, +contig23 32471 32564 ci-PC 0 - 32471 32564 0 1 93, 0, +contig2 16199 18659 ci-PB 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig2 18939 20268 ci-PB 560 - 18939 20268 0 4 510,210,432,51, 0,564,774,1278, +contig2 18712 18877 ci-PB 964 - 18712 18877 0 1 165, 0, +contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295, +contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278, +contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0, +contig24 12402 12561 ci-PB 0 - 12402 12561 0 2 63,96, 0,63, +contig24 12402 12570 ci-PB 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153, +contig24 3846 4032 ci-PB 0 - 3846 4032 0 3 51,21,96, 0,51,90, +contig23 32405 32564 ci-PB 0 - 32405 32564 0 2 63,96, 0,63, +contig23 32405 32573 ci-PB 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153, +contig23 23849 24035 ci-PB 0 - 23849 24035 0 3 51,21,96, 0,51,90, +contig67 20513 20759 ci-PB 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153, +contig67 20531 20753 ci-PB 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186, +contig67 18177 18309 ci-PB 0 - 18177 18309 0 1 132, 0, +contig67 20471 20660 ci-PB 0 - 20471 20660 0 1 189, 0, +contig67 18201 18300 ci-PB 0 - 18201 18300 0 1 99, 0, +contig66 35516 35762 ci-PB 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153, +contig66 35534 35756 ci-PB 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186, +contig66 33180 33312 ci-PB 0 - 33180 33312 0 1 132, 0, +contig66 35474 35663 ci-PB 0 - 35474 35663 0 1 189, 0, +contig66 33204 33303 ci-PB 0 - 33204 33303 0 1 99, 0, |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.psl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.psl Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,50 @@ +1458 762 183 0 7 60 10 57 +- ci-PA 4191 1725 4188 contig2 45017 16199 18659 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1725,1890,2391,2448,2472,2622,2745,2889,3006,3114,3324,3369,3438,3528,3720,3894,3930,4062, 26358,26529,27024,27072,27102,27255,27384,27534,27654,27768,27972,28023,28104,28188,28374,28551,28578,28692, +987 300 231 0 1 3 5 141 +- ci-PA 4191 39 1560 contig2 45017 18939 20598 7 174,45,63,84,432,210,510, 39,213,258,321,405,840,1050, 24419,24599,24647,24716,24872,25304,25568, +162 3 0 0 0 0 0 0 +- ci-PA 4191 1560 1725 contig2 45017 18712 18877 1 165, 1560, 26140, +1458 762 183 0 7 60 10 57 +- ci-PA 4191 1725 4188 contig1 45179 31366 33826 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1725,1890,2391,2448,2472,2622,2745,2889,3006,3114,3324,3369,3438,3528,3720,3894,3930,4062, 11353,11524,12019,12067,12097,12250,12379,12529,12649,12763,12967,13018,13099,13183,13369,13546,13573,13687, +987 300 231 0 1 3 5 141 +- ci-PA 4191 39 1560 contig1 45179 34106 35765 7 174,45,63,84,432,210,510, 39,213,258,321,405,840,1050, 9414,9594,9642,9711,9867,10299,10563, +162 3 0 0 0 0 0 0 +- ci-PA 4191 1560 1725 contig1 45179 33879 34044 1 165, 1560, 11135, +69 90 0 0 1 6 0 0 +- ci-PA 4191 1467 1632 contig24 40010 12402 12561 2 96,63, 1467,1569, 27449,27545, +78 90 0 0 3 15 0 0 +- ci-PA 4191 1542 1725 contig24 40010 12402 12570 4 15,90,42,21, 1542,1563,1659,1704, 27440,27455,27545,27587, +63 105 0 0 1 6 1 18 +- ci-PA 4191 1536 1710 contig24 40010 3846 4032 3 96,21,51, 1536,1632,1659, 35978,36092,36113, +69 90 0 0 1 6 0 0 +- ci-PA 4191 1467 1632 contig23 50012 32405 32564 2 96,63, 1467,1569, 17448,17544, +78 90 0 0 3 15 0 0 +- ci-PA 4191 1542 1725 contig23 50012 32405 32573 4 15,90,42,21, 1542,1563,1659,1704, 17439,17454,17544,17586, +63 105 0 0 1 6 1 18 +- ci-PA 4191 1536 1710 contig23 50012 23849 24035 3 96,21,51, 1536,1632,1659, 25977,26091,26112, +87 159 0 0 3 12 0 0 +- ci-PA 4191 1530 1788 contig67 44531 20513 20759 4 93,21,51,81, 1530,1626,1653,1707, 23772,23865,23886,23937, +87 135 0 0 3 18 0 0 +- ci-PA 4191 1437 1677 contig67 44531 20531 20753 4 36,51,24,111, 1437,1482,1536,1566, 23778,23814,23865,23889, +48 84 0 0 0 0 0 0 +- ci-PA 4191 1503 1635 contig67 44531 18177 18309 1 132, 1503, 26222, +69 120 0 0 0 0 0 0 +- ci-PA 4191 1458 1647 contig67 44531 20471 20660 1 189, 1458, 23871, +36 63 0 0 0 0 0 0 +- ci-PA 4191 1602 1701 contig67 44531 18201 18300 1 99, 1602, 26231, +87 159 0 0 3 12 0 0 +- ci-PA 4191 1530 1788 contig66 45011 35516 35762 4 93,21,51,81, 1530,1626,1653,1707, 9249,9342,9363,9414, +87 135 0 0 3 18 0 0 +- ci-PA 4191 1437 1677 contig66 45011 35534 35756 4 36,51,24,111, 1437,1482,1536,1566, 9255,9291,9342,9366, +48 84 0 0 0 0 0 0 +- ci-PA 4191 1503 1635 contig66 45011 33180 33312 1 132, 1503, 11699, +69 120 0 0 0 0 0 0 +- ci-PA 4191 1458 1647 contig66 45011 35474 35663 1 189, 1458, 9348, +36 63 0 0 0 0 0 0 +- ci-PA 4191 1602 1701 contig66 45011 33204 33303 1 99, 1602, 11708, +1050 336 231 0 1 3 6 270 +- ci-PC 1716 39 1659 contig2 45017 18711 20598 8 171,48,63,84,432,210,510,99, 39,210,258,321,405,840,1050,1560, 24419,24596,24647,24716,24872,25304,25568,26207, +111 138 0 0 0 0 4 39 +- ci-PC 1716 1467 1716 contig2 45017 18601 18889 5 45,54,51,36,63, 1467,1512,1566,1617,1653, 26128,26182,26242,26305,26353, +1050 336 231 0 1 3 6 270 +- ci-PC 1716 39 1659 contig1 45179 33878 35765 8 171,48,63,84,432,210,510,99, 39,210,258,321,405,840,1050,1560, 9414,9591,9642,9711,9867,10299,10563,11202, +111 138 0 0 0 0 4 39 +- ci-PC 1716 1467 1716 contig1 45179 33768 34056 5 45,54,51,36,63, 1467,1512,1566,1617,1653, 11123,11177,11237,11300,11348, +45 48 0 0 0 0 0 0 +- ci-PC 1716 1467 1560 contig24 40010 12468 12561 1 93, 1467, 27449, +45 48 0 0 0 0 0 0 +- ci-PC 1716 1467 1560 contig23 50012 32471 32564 1 93, 1467, 17448, +1458 762 183 0 7 60 10 57 +- ci-PB 3837 1371 3834 contig2 45017 16199 18659 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1371,1536,2037,2094,2118,2268,2391,2535,2652,2760,2970,3015,3084,3174,3366,3540,3576,3708, 26358,26529,27024,27072,27102,27255,27384,27534,27654,27768,27972,28023,28104,28188,28374,28551,28578,28692, +834 264 105 0 1 3 2 126 +- ci-PB 3837 0 1206 contig2 45017 18939 20268 4 51,432,210,510, 0,51,486,696, 24749,24872,25304,25568, +162 3 0 0 0 0 0 0 +- ci-PB 3837 1206 1371 contig2 45017 18712 18877 1 165, 1206, 26140, +1458 762 183 0 7 60 10 57 +- ci-PB 3837 1371 3834 contig1 45179 31366 33826 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1371,1536,2037,2094,2118,2268,2391,2535,2652,2760,2970,3015,3084,3174,3366,3540,3576,3708, 11353,11524,12019,12067,12097,12250,12379,12529,12649,12763,12967,13018,13099,13183,13369,13546,13573,13687, +834 264 105 0 1 3 2 126 +- ci-PB 3837 0 1206 contig1 45179 34106 35435 4 51,432,210,510, 0,51,486,696, 9744,9867,10299,10563, +162 3 0 0 0 0 0 0 +- ci-PB 3837 1206 1371 contig1 45179 33879 34044 1 165, 1206, 11135, +69 90 0 0 1 6 0 0 +- ci-PB 3837 1113 1278 contig24 40010 12402 12561 2 96,63, 1113,1215, 27449,27545, +78 90 0 0 3 15 0 0 +- ci-PB 3837 1188 1371 contig24 40010 12402 12570 4 15,90,42,21, 1188,1209,1305,1350, 27440,27455,27545,27587, +63 105 0 0 1 6 1 18 +- ci-PB 3837 1182 1356 contig24 40010 3846 4032 3 96,21,51, 1182,1278,1305, 35978,36092,36113, +69 90 0 0 1 6 0 0 +- ci-PB 3837 1113 1278 contig23 50012 32405 32564 2 96,63, 1113,1215, 17448,17544, +78 90 0 0 3 15 0 0 +- ci-PB 3837 1188 1371 contig23 50012 32405 32573 4 15,90,42,21, 1188,1209,1305,1350, 17439,17454,17544,17586, +63 105 0 0 1 6 1 18 +- ci-PB 3837 1182 1356 contig23 50012 23849 24035 3 96,21,51, 1182,1278,1305, 25977,26091,26112, +87 159 0 0 3 12 0 0 +- ci-PB 3837 1176 1434 contig67 44531 20513 20759 4 93,21,51,81, 1176,1272,1299,1353, 23772,23865,23886,23937, +87 135 0 0 3 18 0 0 +- ci-PB 3837 1083 1323 contig67 44531 20531 20753 4 36,51,24,111, 1083,1128,1182,1212, 23778,23814,23865,23889, +48 84 0 0 0 0 0 0 +- ci-PB 3837 1149 1281 contig67 44531 18177 18309 1 132, 1149, 26222, +69 120 0 0 0 0 0 0 +- ci-PB 3837 1104 1293 contig67 44531 20471 20660 1 189, 1104, 23871, +36 63 0 0 0 0 0 0 +- ci-PB 3837 1248 1347 contig67 44531 18201 18300 1 99, 1248, 26231, +87 159 0 0 3 12 0 0 +- ci-PB 3837 1176 1434 contig66 45011 35516 35762 4 93,21,51,81, 1176,1272,1299,1353, 9249,9342,9363,9414, +87 135 0 0 3 18 0 0 +- ci-PB 3837 1083 1323 contig66 45011 35534 35756 4 36,51,24,111, 1083,1128,1182,1212, 9255,9291,9342,9366, +48 84 0 0 0 0 0 0 +- ci-PB 3837 1149 1281 contig66 45011 33180 33312 1 132, 1149, 11699, +69 120 0 0 0 0 0 0 +- ci-PB 3837 1104 1293 contig66 45011 35474 35663 1 189, 1104, 9348, +36 63 0 0 0 0 0 0 +- ci-PB 3837 1248 1347 contig66 45011 33204 33303 1 99, 1248, 11708, |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/hubaInputs/dbia3.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/trfBig/hubaInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/hubaInputs/dbia3_trfBig_sorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/trfBig/hubaInputs/dbia3_trfBig_sorted.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,602 @@\n+contig1\t9130\t9428\ttrf\t163\t1.8\t164\t86\t2\t438\t36\t12\t13\t37\t1.82\tAAAAAAAATTATATCTTCGGTGTTTTTCAACATACAACCTCCTAAGCTTGGAAATAACATTTCTTAATCAGTTCTGAATTTCGAATTAAATTTTTATCAAAATCGGACAACTATACCATATAGCTGTCATAGGAAGGATTGGATAATTAGTGGTAAAATAATAT\n+contig1\t15707\t15757\ttrf\t20\t2.5\t19\t83\t9\t55\t32\t12\t2\t54\t1.49\tTATATCATTTATATCTCAG\n+contig1\t16261\t16302\ttrf\t12\t3.4\t12\t89\t0\t64\t2\t36\t24\t36\t1.69\tTCCGTCTGTCCG\n+contig1\t23387\t23483\ttrf\t45\t2.2\t45\t92\t1\t158\t19\t22\t30\t27\t1.98\tTCGGCGAAGAAATGTGCCACTTCGGCGGCACTTCTTGGAGTCACT\n+contig1\t23451\t23520\ttrf\t24\t2.9\t24\t80\t0\t75\t15\t26\t27\t30\t1.96\tTTCGGCGACACTTCTTGAAGTCAG\n+contig1\t24200\t24233\ttrf\t15\t2.2\t15\t94\t0\t57\t51\t12\t15\t21\t1.75\tAAACAATGGAATGCT\n+contig1\t29159\t29628\ttrf\t162\t2.9\t160\t80\t8\t518\t37\t12\t11\t38\t1.80\tGGAAAACATGAAATAAAAATTATATCTTTCGTGTTTTTTAACATATACCTTCTAAGCTTGAAAATAACATTTTTTATTTGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACAATCGGAAAATAAGTA\n+contig1\t37571\t37606\ttrf\t8\t4.4\t8\t88\t0\t52\t31\t25\t42\t0\t1.55\tGACGGACA\n+contig1\t38436\t38491\ttrf\t12\t4.6\t12\t81\t0\t65\t0\t38\t27\t34\t1.57\tTGTCCGTCCGTC\n+contig1\t38436\t38491\ttrf\t20\t3.1\t18\t80\t17\t69\t0\t38\t27\t34\t1.57\tTGTCCGTCTGTCCGTCCG\n+contig1\t38436\t38491\ttrf\t8\t6.9\t8\t79\t8\t58\t0\t38\t27\t34\t1.57\tTGTCCGTC\n+contig1\t43116\t43168\ttrf\t2\t26.0\t2\t96\t0\t95\t48\t0\t1\t50\t1.12\tTA\n+contig10\t7159\t7189\ttrf\t13\t2.3\t13\t94\t0\t51\t36\t10\t16\t36\t1.82\tAATGATGCATTTA\n+contig10\t7363\t7540\ttrf\t101\t1.8\t95\t85\t7\t246\t44\t9\t10\t36\t1.70\tTTTTTATAAAATTGAATTCGAAATTCAGAACCAATTAAAAAATATTATTTATAAGAAGGTTATATGTTAAAAAACACAGTCGATATGATATAGTC\n+contig10\t8034\t8065\ttrf\t16\t1.9\t16\t93\t0\t53\t38\t25\t35\t0\t1.56\tACAGACAGACGGACGA\n+contig10\t16407\t16472\ttrf\t24\t2.7\t24\t88\t4\t87\t23\t23\t33\t20\t1.97\tGAAGTGTCGCCGAAGTGACTCCTG\n+contig10\t17443\t17508\ttrf\t24\t2.7\t24\t88\t4\t87\t23\t24\t33\t18\t1.96\tGAAGTGTCGCCGAAGTGACTCCTG\n+contig10\t18419\t18897\ttrf\t162\t3.0\t162\t78\t8\t408\t35\t15\t12\t36\t1.85\tCCCAAAGATAATTTTTCCATATTATTTTACCACTAATTTTCCGATCCTTCATATGGCAGCAATATGATATAGTCATCCGATTTCGATAAAAATTGAATTCAAAATTCAGAACTAATTAAAAATGGTTATATCCAAGCTTAGAAAGCTATATGTTAAAAATAA\n+contig10\t28438\t28868\ttrf\t163\t2.7\t160\t78\t7\t465\t38\t12\t13\t35\t1.82\tAACATTTTTTAATTAGTTCTGAATTTAAATTTAAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAATAAAAAAATTATATCTTCGGTGTTTTTAACATATAACTCCAAAGCTTAAAAAT\n+contig10\t32277\t32320\ttrf\t2\t21.5\t2\t95\t0\t77\t48\t0\t0\t51\t1.00\tAT\n+contig10\t34149\t34186\ttrf\t19\t1.9\t19\t94\t0\t65\t32\t13\t37\t16\t1.87\tGCTGAGGATGAGATACAGA\n+contig11\t7433\t7863\ttrf\t163\t2.7\t160\t78\t7\t465\t38\t12\t13\t35\t1.82\tAACATTTTTTAATTAGTTCTGAATTTAAATTTAAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAATAAAAAAATTATATCTTCGGTGTTTTTAACATATAACTCCAAAGCTTAAAAAT\n+contig11\t11272\t11315\ttrf\t2\t21.5\t2\t95\t0\t77\t48\t0\t0\t51\t1.00\tAT\n+contig11\t13144\t13181\ttrf\t19\t1.9\t19\t94\t0\t65\t32\t13\t37\t16\t1.87\tGCTGAGGATGAGATACAGA\n+contig11\t22738\t22776\ttrf\t20\t1.9\t20\t88\t0\t58\t34\t13\t7\t44\t1.72\tTATATATATAGAACCTGTTC\n+contig11\t25115\t25366\ttrf\t138\t1.8\t138\t95\t0\t457\t36\t13\t9\t40\t1.77\tCAAATTTTTTGTTTAAAACCGTTTTGGACTCTAAGGCTATGCAATGCATATAACGTTATAAAAAAAGTATTTACTTTTTTAACAAATTTATAACTTACCTATAACATATAACAAGAATACCTTTTGTTTACATTTTAC\n+contig11\t25870\t25921\ttrf\t18\t2.8\t18\t91\t5\t77\t25\t23\t0\t50\t1.49\tTCATCTATATCTTTCATA\n+contig11\t25872\t25939\ttrf\t23\t2.9\t23\t75\t6\t64\t23\t22\t1\t52\t1.56\tATCTTTCATTTCATATCATCTAT\n+contig11\t31165\t31562\ttrf\t159\t2.5\t155\t82\t6\t458\t38\t12\t13\t35\t1.82\tTTTTGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCCATAGAACGATCGGAAAATTAGTGGAAAATATAAAAAAATTATATCTTCGGTGACTTTAACATATAACTTCCAATACTTGAAAATACAATTTTTAATTAGTTCTAAA\n+contig12\t2269\t2312\ttrf\t2\t21.5\t2\t95\t0\t77\t48\t0\t0\t51\t1.00\tAT\n+contig12\t4141\t4178\ttrf\t19\t1.9\t19\t94\t0\t65\t32\t13\t37\t16\t1.87\tGCTGAGGATGAGATACAGA\n+contig12\t13735\t13773\ttrf\t20\t1.9\t20\t88\t0\t58\t34\t13\t7\t44\t1.72\tTATATATATAGAACCTGTTC\n+contig12\t16112\t16363\ttrf\t138\t1.8\t138\t95\t0\t457\t36\t13\t9\t40\t1.77\tCAAATTTTTTGTTTAAAACCGTTTTGGACTCTAAGGCTATGCAATGCATATAACGTTATAAAAAAAGTATTTACTTTTTTAACAAATTTATAACTTACCTATAACATATAACAAGAATACCTTTTGTTTACATTTTAC\n+contig12\t16867\t16918\ttrf\t18\t2.8\t18\t91\t5\t77\t25\t23\t0\t50\t1.49\tTCATCTATATCTTTCATA\n+contig12\t16869\t16936\ttrf\t23\t2.9\t23\t75\t6\t64\t23\t22\t1\t52\t1.56\tATCTTTCATTTCATATCATCTAT\n+contig12\t22162\t22559\ttrf\t159\t2.5\t155\t82\t6\t458\t38\t12\t13\t35\t1.82\tTTTTGAATTAAATTTTATCA'..b'AAAAATTATATCCTTGGTATTTTTTAACATATAACAATATAAGCTAGAAAAG\n+contig70\t17962\t18295\ttrf\t161\t2.1\t160\t83\t5\t413\t37\t12\t13\t35\t1.84\tAAAAAATTATATCTCTGGTGTTTTTAAACATATAACCTCCTAAACTTGGAAATAACATTTTATAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAACCCGACGACTATATCATACATGTAACTGTAACGATCGGAAAATTGATGGGAAAATAATATG\n+contig70\t19030\t19306\ttrf\t137\t2.0\t137\t94\t1\t482\t39\t13\t13\t34\t1.83\tTTTAACACATACCTTTCTAAGCTTGGATATAACATTTTTAAACTGGTTCTGAATTTCAAATTAAATTCAATTAAAATCGGACGACTATATCATATAGCTCCCATAGGAAAAATCGGAAAATTAGTGAGAAAATAATA\n+contig70\t22269\t22308\ttrf\t19\t2.1\t19\t100\t0\t78\t58\t15\t15\t10\t1.62\tAACTAAGGAAATACCAGAA\n+contig70\t22812\t23252\ttrf\t162\t2.8\t156\t84\t5\t562\t37\t11\t13\t37\t1.81\tTTCTAAGCTTGAAATAACATTTTTTAATTAGTTCTGAATTTCGAATTTAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAAATAAAAATTATATTTGTATTTTTAACATATAACC\n+contig70\t23421\t23486\ttrf\t13\t4.7\t13\t70\t20\t51\t38\t0\t0\t61\t0.96\tATATATATAATTT\n+contig70\t23426\t23479\ttrf\t7\t7.4\t7\t79\t12\t54\t35\t0\t0\t64\t0.94\tTATATTT\n+contig70\t23430\t23486\ttrf\t27\t2.0\t27\t83\t10\t69\t35\t0\t0\t64\t0.94\tATTTATATATTTATTAATATATTTTAT\n+contig70\t25576\t25606\ttrf\t12\t2.5\t12\t94\t0\t51\t33\t26\t36\t3\t1.73\tACGGACGGACAG\n+contig70\t27263\t27382\ttrf\t63\t1.9\t63\t85\t3\t168\t27\t25\t21\t26\t1.99\tATTTTTCCAAGCACTTTCCTGTACAAGGGAAACGTCCCAGGGAAAGCCTATCGGAATTTCAAA\n+contig70\t27353\t27406\ttrf\t25\t2.1\t25\t85\t0\t70\t30\t26\t32\t11\t1.91\tGGAAACGTCCCAGGGAAAGCCCATC\n+contig70\t27848\t27878\ttrf\t12\t2.5\t12\t88\t0\t51\t30\t30\t40\t0\t1.57\tACGGACGGACAG\n+contig70\t35504\t35713\ttrf\t102\t2.0\t102\t99\t0\t409\t37\t12\t13\t36\t1.82\tAAAACGATGGTAGACAAATATGCATATATTTTTTACACAAAACGAAATATAATGGACTTTTAAAAATTCTTTTTTCTATCTTTCCTGGTGGGAGATATATAT\n+contig70\t43333\t43446\ttrf\t57\t2.0\t57\t94\t0\t199\t30\t17\t22\t29\t1.97\tAATATTGGGAATAACATATTATCTTATAATATGGGAGCGCGAAGGCTCCTCGCCCAT\n+contig70\t44241\t44281\ttrf\t13\t2.9\t14\t85\t7\t55\t27\t22\t12\t37\t1.90\tCTACATGTTACATG\n+contig70\t44242\t44281\ttrf\t7\t5.7\t7\t81\t6\t53\t28\t20\t12\t38\t1.89\tTACATGT\n+contig70\t44485\t44531\ttrf\t14\t3.3\t14\t84\t0\t65\t41\t21\t10\t26\t1.86\tAACCGTATATGACT\n+contig70\t45147\t45181\ttrf\t16\t2.0\t17\t88\t11\t52\t52\t0\t2\t44\t1.16\tAAATATTAGTAATATAT\n+contig70\t46706\t46990\ttrf\t147\t1.9\t147\t100\t0\t568\t39\t10\t9\t40\t1.72\tTTCTTTATTTTTTTTATTTTAAAATACTTAGTACTTAGTAATGTCGCTAAAACCAATATAATATTCTTTAAAATTTAGAAAATATATTCAGACTTCGAATTACAAGGTGTATTAGAAGATAAATTAAACTCTAAAGCTTAATTTATC\n+contig70\t53900\t53978\ttrf\t28\t2.8\t28\t80\t5\t97\t15\t43\t5\t35\t1.69\tCTCTGTCACCCTCTCTTTACCTACCTCA\n+contig8\t2833\t2903\ttrf\t24\t2.9\t25\t83\t8\t92\t15\t27\t27\t30\t1.96\tCTTCGGCGACACTTCTTGGAAGTCA\n+contig8\t22744\t22922\ttrf\t88\t2.0\t88\t97\t0\t338\t28\t16\t24\t30\t1.96\tATGGGAGCTATAAGATATAGTTGTCCCATCCGGCAGGTTTCGACTTATATATTGCCTGCCATAGAAAGGAAACTTTTGGGAAAGTTTC\n+contig8\t29631\t30109\ttrf\t162\t3.0\t159\t81\t4\t573\t36\t12\t14\t36\t1.84\tAAAAATTATATCTTCGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACAATTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCCATAGGAACAATCGGAAAATTAGTGGAAAATGAAAT\n+contig9\t2833\t2903\ttrf\t24\t2.9\t25\t83\t8\t92\t15\t27\t27\t30\t1.96\tCTTCGGCGACACTTCTTGGAAGTCA\n+contig9\t22744\t22922\ttrf\t88\t2.0\t88\t97\t0\t338\t28\t16\t24\t30\t1.96\tATGGGAGCTATAAGATATAGTTGTCCCATCCGGCAGGTTTCGACTTATATATTGCCTGCCATAGAAAGGAAACTTTTGGGAAAGTTTC\n+contig9\t29631\t30109\ttrf\t162\t3.0\t159\t81\t4\t573\t36\t12\t14\t36\t1.84\tAAAAATTATATCTTCGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACAATTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCCATAGGAACAATCGGAAAATTAGTGGAAAATGAAAT\n+contig9\t37064\t37094\ttrf\t12\t2.5\t12\t100\t0\t60\t0\t40\t26\t33\t1.57\tGTCCGTCTGTCC\n+contig9\t37275\t37670\ttrf\t162\t2.4\t160\t82\t10\t455\t37\t12\t13\t35\t1.83\tATAACAATTTTTATTTGTTTTGAATTTCGAATTAAATTTATCAAAATCGGACGACTATATCATATAGCTGCCAAGAGAAACAATCGGAAAATTAGTGGAAAAATAACATTGAAAAAGTATATCTTCGGTGTTTCTTAACATACAACCTCATAAGCTTGAA\n+contig9\t37280\t37752\ttrf\t161\t2.9\t161\t80\t6\t492\t37\t11\t15\t35\t1.84\tATTTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCAATAGGAACAATCGGAAAATTAGTGGGAAATACATGTGAAAAAATTATATCTTTGGTGTTTTTAACATATAACCTTATAAGCTTGGAAATACA\n+contig9\t56173\t56203\ttrf\t13\t2.3\t13\t94\t0\t51\t36\t10\t16\t36\t1.82\tAATGATGCATTTA\n+contig9\t56377\t56554\ttrf\t101\t1.8\t95\t85\t7\t246\t44\t9\t10\t36\t1.70\tTTTTTATAAAATTGAATTCGAAATTCAGAACCAATTAAAAAATATTATTTATAAGAAGGTTATATGTTAAAAAACACAGTCGATATGATATAGTC\n+contig9\t57048\t57079\ttrf\t16\t1.9\t16\t93\t0\t53\t38\t25\t35\t0\t1.56\tACAGACAGACGGACGA\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/output/dbia3_trfBig.bb |
b |
Binary file test-data/trfBig/output/dbia3_trfBig.bb has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/trfBig/workflowInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3.fa.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/trfBig/workflowInputs/dbia3.fa.txt Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted.bed Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,602 @@\n+contig1\t9130\t9428\ttrf\t163\t1.8\t164\t86\t2\t438\t36\t12\t13\t37\t1.82\tAAAAAAAATTATATCTTCGGTGTTTTTCAACATACAACCTCCTAAGCTTGGAAATAACATTTCTTAATCAGTTCTGAATTTCGAATTAAATTTTTATCAAAATCGGACAACTATACCATATAGCTGTCATAGGAAGGATTGGATAATTAGTGGTAAAATAATAT\n+contig1\t15707\t15757\ttrf\t20\t2.5\t19\t83\t9\t55\t32\t12\t2\t54\t1.49\tTATATCATTTATATCTCAG\n+contig1\t16261\t16302\ttrf\t12\t3.4\t12\t89\t0\t64\t2\t36\t24\t36\t1.69\tTCCGTCTGTCCG\n+contig1\t23387\t23483\ttrf\t45\t2.2\t45\t92\t1\t158\t19\t22\t30\t27\t1.98\tTCGGCGAAGAAATGTGCCACTTCGGCGGCACTTCTTGGAGTCACT\n+contig1\t23451\t23520\ttrf\t24\t2.9\t24\t80\t0\t75\t15\t26\t27\t30\t1.96\tTTCGGCGACACTTCTTGAAGTCAG\n+contig1\t24200\t24233\ttrf\t15\t2.2\t15\t94\t0\t57\t51\t12\t15\t21\t1.75\tAAACAATGGAATGCT\n+contig1\t29159\t29628\ttrf\t162\t2.9\t160\t80\t8\t518\t37\t12\t11\t38\t1.80\tGGAAAACATGAAATAAAAATTATATCTTTCGTGTTTTTTAACATATACCTTCTAAGCTTGAAAATAACATTTTTTATTTGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACAATCGGAAAATAAGTA\n+contig1\t37571\t37606\ttrf\t8\t4.4\t8\t88\t0\t52\t31\t25\t42\t0\t1.55\tGACGGACA\n+contig1\t38436\t38491\ttrf\t8\t6.9\t8\t79\t8\t58\t0\t38\t27\t34\t1.57\tTGTCCGTC\n+contig1\t38436\t38491\ttrf\t12\t4.6\t12\t81\t0\t65\t0\t38\t27\t34\t1.57\tTGTCCGTCCGTC\n+contig1\t38436\t38491\ttrf\t20\t3.1\t18\t80\t17\t69\t0\t38\t27\t34\t1.57\tTGTCCGTCTGTCCGTCCG\n+contig1\t43116\t43168\ttrf\t2\t26.0\t2\t96\t0\t95\t48\t0\t1\t50\t1.12\tTA\n+contig2\t540\t590\ttrf\t20\t2.5\t19\t83\t9\t55\t32\t12\t2\t54\t1.49\tTATATCATTTATATCTCAG\n+contig2\t1094\t1135\ttrf\t12\t3.4\t12\t89\t0\t64\t2\t36\t24\t36\t1.69\tTCCGTCTGTCCG\n+contig2\t8220\t8316\ttrf\t45\t2.2\t45\t92\t1\t158\t19\t22\t30\t27\t1.98\tTCGGCGAAGAAATGTGCCACTTCGGCGGCACTTCTTGGAGTCACT\n+contig2\t8284\t8353\ttrf\t24\t2.9\t24\t80\t0\t75\t15\t26\t27\t30\t1.96\tTTCGGCGACACTTCTTGAAGTCAG\n+contig2\t9033\t9066\ttrf\t15\t2.2\t15\t94\t0\t57\t51\t12\t15\t21\t1.75\tAAACAATGGAATGCT\n+contig2\t13992\t14461\ttrf\t162\t2.9\t160\t80\t8\t518\t37\t12\t11\t38\t1.80\tGGAAAACATGAAATAAAAATTATATCTTTCGTGTTTTTTAACATATACCTTCTAAGCTTGAAAATAACATTTTTTATTTGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACAATCGGAAAATAAGTA\n+contig2\t22404\t22439\ttrf\t8\t4.4\t8\t88\t0\t52\t31\t25\t42\t0\t1.55\tGACGGACA\n+contig2\t23269\t23324\ttrf\t8\t6.9\t8\t79\t8\t58\t0\t38\t27\t34\t1.57\tTGTCCGTC\n+contig2\t23269\t23324\ttrf\t12\t4.6\t12\t81\t0\t65\t0\t38\t27\t34\t1.57\tTGTCCGTCCGTC\n+contig2\t23269\t23324\ttrf\t20\t3.1\t18\t80\t17\t69\t0\t38\t27\t34\t1.57\tTGTCCGTCTGTCCGTCCG\n+contig2\t27949\t28001\ttrf\t2\t26.0\t2\t96\t0\t95\t48\t0\t1\t50\t1.12\tTA\n+contig2\t38280\t38311\ttrf\t16\t1.9\t16\t93\t0\t53\t41\t19\t6\t32\t1.77\tTACATACATACATATG\n+contig3\t3265\t3296\ttrf\t16\t1.9\t16\t93\t0\t53\t41\t19\t6\t32\t1.77\tTACATACATACATATG\n+contig3\t17933\t17958\ttrf\t10\t2.5\t10\t100\t0\t50\t60\t8\t0\t32\t1.26\tATATAAACAT\n+contig3\t19067\t19124\ttrf\t28\t2.0\t28\t100\t0\t114\t43\t14\t10\t31\t1.79\tAATTAAATTTTATCAAAATCGGACGACT\n+contig3\t23368\t23418\ttrf\t2\t25.0\t2\t100\t0\t100\t50\t0\t0\t50\t1.00\tAT\n+contig3\t24889\t25322\ttrf\t160\t2.7\t161\t80\t6\t452\t36\t12\t12\t38\t1.82\tAATATTAAAAATTATATCTTTGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACATTTTTTATTTGGTTTAATTTCGAATTAAATTTTATTAAAATCGGACGACCATATCATACAGCTCCCATAGAAACAATCGGAAAATTAGTCGGAAACATG\n+contig3\t26816\t26900\ttrf\t18\t5.0\t16\t83\t13\t89\t20\t5\t0\t73\t1.03\tTTTTTTTATATATTTT\n+contig3\t26816\t26887\ttrf\t18\t4.1\t17\t84\t12\t90\t19\t4\t0\t76\t0.96\tTTTTTTTATATATTTTT\n+contig3\t26816\t26900\ttrf\t15\t5.2\t15\t79\t16\t71\t20\t5\t0\t73\t1.03\tTTTTTTTATATATAT\n+contig3\t26820\t26887\ttrf\t17\t3.7\t19\t80\t20\t74\t20\t4\t0\t74\t0.99\tTTTATATATTTTTTTTTCA\n+contig3\t29468\t29515\ttrf\t25\t2.0\t23\t84\t12\t60\t34\t0\t17\t48\t1.47\tATTATAATTATGATGTTATGATG\n+contig3\t38756\t38791\ttrf\t10\t3.6\t10\t92\t7\t63\t40\t0\t0\t60\t0.97\tATTTATTTAA\n+contig3\t38758\t38788\ttrf\t9\t3.2\t9\t95\t4\t51\t40\t0\t0\t60\t0.97\tTTATTTAAA\n+contig4\t7931\t7956\ttrf\t10\t2.5\t10\t100\t0\t50\t60\t8\t0\t32\t1.26\tATATAAACAT\n+contig4\t9065\t9122\ttrf\t28\t2.0\t28\t100\t0\t114\t43\t14\t10\t31\t1.79\tAATTAAATTTTATCAAAATCGGACGACT\n+contig4\t13366\t13416\ttrf\t2\t25.0\t2\t100\t0\t100\t50\t0\t0\t50\t1.00\tAT\n+contig4\t14887\t15320\ttrf\t160\t2.7\t161\t80\t6\t452\t36\t12\t12\t38\t1.82\tAATATTAAAAATTATATCTTTGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACATTTTTTATTTGGTTTAATTTCGAATTAAATTTTATTAAAATCGGACGACCATATCATACAGCTCCCATAGAAACAATCGGAAAATTAGTCGGAAACATG\n+contig4\t16814\t16898\ttrf\t18\t5.0\t16\t83\t13\t89\t20\t5\t0\t73\t1.03\tTTTTTTTATATATTTT\n+contig4\t16814\t16885\ttrf\t18\t4.1\t17\t84\t12\t90\t19\t4\t0\t76\t0.96\tTTTTTTTATATATTTTT\n+contig4\t16814\t16898\ttrf\t15\t5.2\t15\t79\t16\t71\t20\t5\t0\t73\t1.03\tTTTTTTTATATATAT'..b'\n+contig69\t18503\t18850\ttrf\t163\t2.1\t163\t88\t3\t502\t36\t12\t16\t35\t1.86\tCTTTGGTGCTATTTGACATATAACCTCCTAAGCTTGGAAATATCATTTTTTAATTGATTTTGAAATTCAAATTAAATTTGATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACGATCGGAAAATTGGTGGAAAAATAATATGAAACAAATTATAG\n+contig69\t18536\t18888\ttrf\t163\t2.2\t161\t85\t4\t472\t36\t9\t16\t37\t1.82\tTTGGAAATACAATTTTTTATATTTATGAAATTCAAATTAAATTTGATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACGATCGGAAAATTGGTGGAAAAATAATATGAAACAAATTATAGCTTTGGAGCTGTTTGACATATAACCCTATAAGA\n+contig69\t19781\t19812\ttrf\t15\t2.1\t14\t94\t5\t53\t58\t0\t0\t41\t0.98\tATATAATTATAATA\n+contig69\t23637\t24060\ttrf\t162\t2.7\t156\t83\t6\t496\t37\t11\t12\t38\t1.79\tAAAAAAATTATATCTTCGGTGCTTTTTAACATATAACCTTCTAAGCTTGGAAATAACATTTTTTAATTAGTTCTGAATTTCAATTAAATTTTATCAAAATCGGACGACTTATAGCTGCCATAGGAACGATCGAAAAATTGATGGGAAAATAAATAT\n+contig69\t36863\t36892\ttrf\t15\t1.9\t15\t100\t0\t58\t27\t20\t48\t3\t1.66\tGGGGAACGCGAGCAT\n+contig69\t38645\t39124\ttrf\t158\t3.0\t157\t85\t5\t615\t34\t13\t13\t38\t1.84\tTAAAAATTGTTATTTCCAAGCTTAGAAGGTTATATGTTAAAAAACACCAAGATATAATTTTTTCATATTTTCCGACTATTTTTCCGATCGTTTCTATGGCAGCTATATGATATAGTCGTCCGATTTTGATAAAATTTAATTTGAAATTAAAACCAAT\n+contig69\t43653\t44084\ttrf\t162\t2.7\t162\t87\t4\t586\t40\t10\t13\t35\t1.79\tAACATTTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGAAGACTATATCATATAGCTGTCATAGGAACGATCGAAAAATTGGTGGAAAATAATATAATAAAAATTATATCCTTGGTATTTTTTAACATATAACAATATAAGCTAGAAAAG\n+contig70\t6910\t6939\ttrf\t15\t1.9\t15\t100\t0\t58\t27\t20\t48\t3\t1.66\tGGGGAACGCGAGCAT\n+contig70\t8692\t9171\ttrf\t158\t3.0\t157\t85\t5\t615\t34\t13\t13\t38\t1.84\tTAAAAATTGTTATTTCCAAGCTTAGAAGGTTATATGTTAAAAAACACCAAGATATAATTTTTTCATATTTTCCGACTATTTTTCCGATCGTTTCTATGGCAGCTATATGATATAGTCGTCCGATTTTGATAAAATTTAATTTGAAATTAAAACCAAT\n+contig70\t13700\t14131\ttrf\t162\t2.7\t162\t87\t4\t586\t40\t10\t13\t35\t1.79\tAACATTTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGAAGACTATATCATATAGCTGTCATAGGAACGATCGAAAAATTGGTGGAAAATAATATAATAAAAATTATATCCTTGGTATTTTTTAACATATAACAATATAAGCTAGAAAAG\n+contig70\t17962\t18295\ttrf\t161\t2.1\t160\t83\t5\t413\t37\t12\t13\t35\t1.84\tAAAAAATTATATCTCTGGTGTTTTTAAACATATAACCTCCTAAACTTGGAAATAACATTTTATAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAACCCGACGACTATATCATACATGTAACTGTAACGATCGGAAAATTGATGGGAAAATAATATG\n+contig70\t19030\t19306\ttrf\t137\t2.0\t137\t94\t1\t482\t39\t13\t13\t34\t1.83\tTTTAACACATACCTTTCTAAGCTTGGATATAACATTTTTAAACTGGTTCTGAATTTCAAATTAAATTCAATTAAAATCGGACGACTATATCATATAGCTCCCATAGGAAAAATCGGAAAATTAGTGAGAAAATAATA\n+contig70\t22269\t22308\ttrf\t19\t2.1\t19\t100\t0\t78\t58\t15\t15\t10\t1.62\tAACTAAGGAAATACCAGAA\n+contig70\t22812\t23252\ttrf\t162\t2.8\t156\t84\t5\t562\t37\t11\t13\t37\t1.81\tTTCTAAGCTTGAAATAACATTTTTTAATTAGTTCTGAATTTCGAATTTAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAAATAAAAATTATATTTGTATTTTTAACATATAACC\n+contig70\t23421\t23486\ttrf\t13\t4.7\t13\t70\t20\t51\t38\t0\t0\t61\t0.96\tATATATATAATTT\n+contig70\t23426\t23479\ttrf\t7\t7.4\t7\t79\t12\t54\t35\t0\t0\t64\t0.94\tTATATTT\n+contig70\t23430\t23486\ttrf\t27\t2.0\t27\t83\t10\t69\t35\t0\t0\t64\t0.94\tATTTATATATTTATTAATATATTTTAT\n+contig70\t25576\t25606\ttrf\t12\t2.5\t12\t94\t0\t51\t33\t26\t36\t3\t1.73\tACGGACGGACAG\n+contig70\t27263\t27382\ttrf\t63\t1.9\t63\t85\t3\t168\t27\t25\t21\t26\t1.99\tATTTTTCCAAGCACTTTCCTGTACAAGGGAAACGTCCCAGGGAAAGCCTATCGGAATTTCAAA\n+contig70\t27353\t27406\ttrf\t25\t2.1\t25\t85\t0\t70\t30\t26\t32\t11\t1.91\tGGAAACGTCCCAGGGAAAGCCCATC\n+contig70\t27848\t27878\ttrf\t12\t2.5\t12\t88\t0\t51\t30\t30\t40\t0\t1.57\tACGGACGGACAG\n+contig70\t35504\t35713\ttrf\t102\t2.0\t102\t99\t0\t409\t37\t12\t13\t36\t1.82\tAAAACGATGGTAGACAAATATGCATATATTTTTTACACAAAACGAAATATAATGGACTTTTAAAAATTCTTTTTTCTATCTTTCCTGGTGGGAGATATATAT\n+contig70\t43333\t43446\ttrf\t57\t2.0\t57\t94\t0\t199\t30\t17\t22\t29\t1.97\tAATATTGGGAATAACATATTATCTTATAATATGGGAGCGCGAAGGCTCCTCGCCCAT\n+contig70\t44242\t44281\ttrf\t7\t5.7\t7\t81\t6\t53\t28\t20\t12\t38\t1.89\tTACATGT\n+contig70\t44241\t44281\ttrf\t13\t2.9\t14\t85\t7\t55\t27\t22\t12\t37\t1.90\tCTACATGTTACATG\n+contig70\t44485\t44531\ttrf\t14\t3.3\t14\t84\t0\t65\t41\t21\t10\t26\t1.86\tAACCGTATATGACT\n+contig70\t45147\t45181\ttrf\t16\t2.0\t17\t88\t11\t52\t52\t0\t2\t44\t1.16\tAAATATTAGTAATATAT\n+contig70\t46706\t46990\ttrf\t147\t1.9\t147\t100\t0\t568\t39\t10\t9\t40\t1.72\tTTCTTTATTTTTTTTATTTTAAAATACTTAGTACTTAGTAATGTCGCTAAAACCAATATAATATTCTTTAAAATTTAGAAAATATATTCAGACTTCGAATTACAAGGTGTATTAGAAGATAAATTAAACTCTAAAGCTTAATTTATC\n+contig70\t53900\t53978\ttrf\t28\t2.8\t28\t80\t5\t97\t15\t43\t5\t35\t1.69\tCTCTGTCACCCTCTCTTTACCTACCTCA\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted_withMaskedGenome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted_withMaskedGenome.fa Wed Jul 13 13:36:37 2016 -0400 |
b |
b'@@ -0,0 +1,66784 @@\n+>contig1\n+aaaactaattttatcaaaatcggacaactatatcatatagctgccatacg\n+aacgatcggaaaattggtaagtaaataattaaaaatattatatctttggt\n+gtgtttttggaaataacattttttaatatgttttgaattttgaattaaat\n+tctatcaaaatcggacgactatatcatatagctgccatacaaacgatcgg\n+aaaattggtgaataaataatatgaaaaaattatatctttggttatttttt\n+agacaaataacctccaacgcttggaaataacattttttaattagttataa\n+atttgatattttaattttatcaaaatcggacgaatatagcatatagctgt\n+taaaataatatgaaacaaattatagctccggtgttttttacatattatct\n+tatactattgggaaaatagtttattatattttaaagaatttccaattaaa\n+ctctaacatatagctttcaaagaaacggtcaaaaaagtaaagaaatcatt\n+tttttttaacatcactgaagctagaaacaatccttaaaaatgtaacatgg\n+tgttagtagcattgaaaattgcttataactgcaaagggaaaacaaacatc\n+ggcttgccgaatgtaatttccattcttgtttgacttgagtttataactta\n+caattatggtaaggtgcctgattttggtttttgccatacggtatgtggta\n+gttatcattttgccgtgctataagtgacatcacagccgattcttttactc\n+cataatgggccaatgtgttgaggcgtttccatccatttattgtttttgta\n+gttaggtcttcatcttgtaatgtgagatgacctcctcttccatgcctcca\n+ttctaagtcaacttcgtgcacagatggtctcatagaaaacggagtatttt\n+taaaaattgcgtccaaaatttttaatttaacttgcgatatggtatccaaa\n+tcgtttacacgacattggactttttcatcaagatcatcttgtaaaatgtg\n+caaaattacaacagaatgggtaacttgttcgtggagaagtcgttcctctg\n+aaagtgaataacgagcatcgtgggttattgcgtccaccaaacccttttca\n+atttgatgcttaattgccttgaacagcaaaaataaattagatccagcata\n+ttcttttaggtagtcgtacatacaaattgctaagtagtttgttaacattt\n+tttcaactacgctctcagtgcgtcgtagcattagctgaggatgcttgctg\n+gcgagcgatttgtcaattaatcgcaataaaagggactttaaaatttccgt\n+cgcatattccattttgttcattagaacaaccataagtaaagaggcgacgt\n+taactcgatcgcgaattgagaaagatgaccgttgagcttctaaagtttct\n+atgaacaatagtaaaaaatatttgtttccaataagttgctcgaattgtat\n+catagctgcatcatagttagtgtgcgggctacttccacaaaattttcggg\n+agtttagaataggatgatctgatacaccgggaaagaaaactttcataatg\n+taattgacgtgatctaacgttggtataccggtgctctccaaatctgctgt\n+tagatcggtcatgtccgtttggagctcagcaaatgcctgtttacactcag\n+aacgaacgttgctttccaatgttatcatctgtatctgaattcgtttgtat\n+tcccgttctgcttgggtcgatttccttctaaatattattagtacgacaac\n+caaaacgatgacaagcactgcaactgtcaatataacgacaaacatggcat\n+gtgaaaaaacatagggtttatttaaatcatatttcaaatatcctatggcg\n+aaacgaagatttcgccctactttaaccacaactagaggtaaatctgtcga\n+ttgatccacaccattttcatcagttgggagtggttgatgttccggtggaa\n+tgcacaaaagttgagttagtgtaaggcttgttatattgcattgagtagta\n+ccaatggttacattaacgtcgtattcatcagctgccaaatttagtagctc\n+gccttcaataaccaagctgtcacccttgtatagtttaattccgtcatttg\n+gaaatggtaaatattttggatcggctaaataaacaatagtgctcctgata\n+tcatgaaagtatttatttagatctcgaacaagctgtacattgtccattac\n+aaaactaagttgtaaattcagttgcgtttcatggacctttacaaaagttg\n+taacatccatattattattgacgaaatagctaggcgcagttccccctgtt\n+gtgtatatatgaaagttgtctacgaagtctgcttttcgctttcgtctaga\n+atattcgttctgatgcgcactttcatttttaaagtcaaattccgaatcca\n+tcactctgttagaacttttaaatgcttgaaatttatcatttaccggagga\n+gatgggcattccatttgatttgaattgattactacgcaagatgttttgtt\n+tactctttcattatcaaaaaaaacttcaagttcaggcttttgaattgaat\n+taagatacattccatgaacggtcagaacgcgaccaccacttacaaaactg\n+cgtaaaggcttaatctgcattatacgcgggtcttgggtatagttaaaaat\n+agaacagggctgccttggtaacgtacgaaattggtatgaaccgaaattac\n+ttcgagttagatttttattgggtatagttgcgggtgagatatgacattct\n+aaagttcggttggcaccgtcaataagaaggtgtagagaccgtattggttc\n+tggctgggtcgcctctgatgtagtactactaacttgagttgaagaagctt\n+gtgtaacgtttatatgacactcgtactcatctaaatatgctcgcatggtt\n+gatccaatgttcaaaaactttcctattaacgacaattgtgttcctcccga\n+cctgggcccaatcgtgggatataatcctgttagcaaaacattcttaaagt\n+gaaattgtacactagattcagtataccctgcatcatttgcaacctttata\n+ggcgctgacatttcatacatcactgctccagtgcggcattcgatctttac\n+agaaatttggtagttgactagttcacaaggcacagacccaataaatattt\n+ttccacgcacatctccttcgcgaatgcccaaattactgccttcaattgta\n+ataagggtaccaccctcaataggtccagataacggtttaataatatcgat\n+ccttggtaaagggcactcgttttctttagttgtttttgatcctgcactaa\n+tagcacgatcactgtctgctatgcaagtctcgttatatacacatgagttg\n+ctgcaccaagcacattggtattttggatcacgagtgacacacaaactgca\n+atcagcatgatcccgatgtgagcccagtacgtcgcatttatataatgtaa\n+caatcgccgtgtccacatagtgctggaaattccatgtaattacaaccttt\n+gcttggtattcatgtgtattagtctcgtagaaataaggtgttttttcaca\n+aacaacaatcttgtttgactcaatgtgggcaggcaatagcatctgagcag\n+cttcaatatgaactgtacacaagaatccagcgtgagcgcttttgggtttt\n+ggtaagttttctatctctaaacgaatttcttttggcacccgcaccggtaa\n+aagaatcgccggacgattgctttttaaatgggggcattgaccaacactag\n+ttaccgaattttctatattacgacattgtattgattgatggacacattta\n+ttgtcaaaaatacacc'..b'ccactggacccgttggatagcgtc\n+ggcacttttctgtttggtgccattcactggtcactagtaggaccgctctg\n+cctattaaaataggagtgctgcttagaatttatttcagcaagacagccgt\n+aagatatatatgaatagtatttaacggcagtggcagaggcctttacaata\n+taaggctttacgggattttaaacaatgaaaaacaaaaatatttttataga\n+gtcgagataaaatactttttacaataaatactaagtatttttaaaagata\n+aaaatatttaaaagtaaatactgaatattttaacaggtgttaagtactgg\n+gaactgtgactaaggacggcggtatatcgctaccataatatttttaaagg\n+aactgaatactgaaagatgtgaccaaaatggtagaatatcgctaacgaaa\n+atcacagacgcaaatatcgatagtggcctagagacccgatatcgccgccg\n+tgattatcgatgaaactacagctgagctgcttgtttacattctaaagtta\n+aaatttttaaaaatttccaatacaaaaaaaaattcttcgtattaacacgg\n+ctgtttgccggtaagatctgaagtggattcaagttgcagcaccctcagga\n+aaagaataaattattataatataataatataatataaagaatttaaattc\n+gaaccaaacgcggtatgtcactcttcatttagagaatttgtggctttgtt\n+ttgtacaaaaaactattcttttttcttgaaaattagcatcaagaatttat\n+tgccaaaacagaacgcaattctgctaaatctggttaccctgccaaatggc\n+tggccgttctgtgcattcaataacacaagctaacggcttacggcacattc\n+ggaaagatccaaaagaatttctattccggcattttttatgtattatcgtc\n+caatttttgtatgtgtattcattcttttggctgtccgttattaagctttt\n+cttatccacataagctgaccagctgccaaaaagccgaccgttttgtgcag\n+ccaataacccaagctaactatcaacagcaatttcgaaaaatccagaagaa\n+gctatccttcggcaattttttcttatattataatttttttgtcacaattt\n+tttgtcaaaaaatcgacaataattaagtgtttacattttaatgcgattta\n+attggaaatttgaattcgaatcaaacgcggtgtcactctacatatggaca\n+atttgtggctacaaaattagttgaaagtgcatgtttttttttaagtgaaa\n+taaaacaaaaacagcaataacaggccaaaatgttgttttataaataaata\n+cttacatatccgcatataaatatgaatgaaaaaaaaaacagcaagactgg\n+gccaaaatattgttctttaatatgtaaataaatacatatctacatataca\n+tatgtggacaaaagtgttaaaattagttaatagtgcatgttttttaaatg\n+aaaaaataaaacaaaaaaagcaagcaaatataaattatattccacacaca\n+tttacaataatttcaaactacgctaaaaatacccaacagaaaaaacatag\n+aaaggtaaacaaaatacaagaatatcaaaaaaaatctaattttcacgttg\n+ttgtggccggtaccaccggtcatacatacatatagatacatatttagatt\n+ctcacaaaaagaatagaaaattaccgctaatttccttatatacgtttttg\n+ttttcagtgttttgaaatagtatttttcaaacaaaaaacaatacaaaagg\n+gttaaaaaaattgttcgattggctttaacatactaattaaaaaaaatcac\n+aattttttggcaaaatatcgaaaaaaattaagtgtttacatttttatgcg\n+aattatttggaaatttaaataagaacccaacgcggtttgtcactctacat\n+atggacaatttgtggctttgttttgaacaaaaaatgaatttttttttact\n+caaaattaacaagaagaaacagtaatgcgggttttgatcgcaattctgct\n+aaatcgaatttccgtgcccaaaggctgacctttttgtgcagcaaatagcc\n+caagctaataatctacagcaatttcttaaagaccgaaaatgtatcaactt\n+cggcaaaatatgaaaatttaacatttatttttcaacattttttgccaaat\n+aatcgacaaaaatttcagtgtcccgatttggatgcgaattaattgaggac\n+aattgtgactttgttttgtaaaaaaactattttttttactgaaaattaca\n+atcaagaatctaaggccaaatcagaatattttcactgcggttttttgatc\n+gcaattctgctaaatcgggttccgtgccaaaaagctgactatttttttta\n+ctgagaattagcatcaagaatttattgccaaaacagaacgcaattctgct\n+aaatcgggttgccctgccaaaaggctggccgttttgcgcattcaataaca\n+caagctaacggtttacagcacattcggaaagatccaaaagaatttctatt\n+tcggcattttttatgtattatcgtctaattttggtatgtgtattcattct\n+tttggctgtccgttattaagctgcttttcttattcacatattcaatggcg\n+cagtacataaaaagaatgaaccgtggttttcttgccctctccatctccct\n+ctaccacttcccctctcaacaactatctcacgcactctgtcccttctcgt\n+nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn\n+nnnnnnnnnnnnnnnnnnnnnnnnnnnnattctctctgtcgcgctcgctc\n+tctcaacatgcctttcgcaccgtatctcttcgtttctctgtatgcttgct\n+ctcttaatttaagtttctctttctttgtgttcaatatctttcgcgctcgt\n+gactcctattgaccggactctgagcggtgctcattaagagtggagttttc\n+gatgatggcgggtggcgcgaacgagaattttttacataaaggtaagttgc\n+caattcttttttaaatgtgacatacagcatgcatattttatttattgtaa\n+ttatatgtgaaagaataaaattatatgtacttttatctaatctattgcat\n+ctattttttcacaggtaaacgagtcacacgagaagtaaaacacaaacaca\n+aggatcaaaataaaattcaggtgagtgaacgaacgtgttgaaagtagtta\n+atagtgcatgtctatttaagtgtaagaaacaaaaaagacacaaatgggcc\n+aataattcgtttctttaatagataactgcctatattatgttcaaactatg\n+ctaaaaagacccaaaaggaaaacataaagtatataccttgcaaaatgaat\n+aaaaagaaatcatatatattcatatacgtttgaccggtaccaccggtcat\n+acatacataaacataaacacaaaaagactagaatataaataccatatttt\n+tgattggcttaaaaatggcacaaaaaaaagataaaaactagagtgtctag\n+ctttaagtgacaatcgactcggaaaatgatcaagaaataaatgaaaaatt\n+tgtcttgcggctttttggtcgcaattctaaatcggtttgtatgtatgtca\n+ctcttcatgtatacaatttggtggctttgtattccacaaaagtcacagta\n+gtcacaaaagttttttttactgaaaattaccatcaagagtctatggccaa\n+atcagaatattttcactgcggttttttatttcaattctgctaaatcgggt\n+ttccgtgccaaaaagcttttgcagccaacaacaatattcaattttttggc\n+aaaaaatcgacaaaaaaatt\n' |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a todo.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/todo.md Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,47 @@ +# HubArchiveCreator's TODO + +*TODO file inspired from: http://lifehacker.com/why-a-github-gist-is-my-favorite-to-do-list-1493063613* + +### TO COMPLETE + + +- [ ] Don't let the Tool Classes manage the archive (add or remove files / folders) => Everything should pass through TrackHub or another class dedicated to that +- [ ] Move the class and others program related files, into separated folders +- [ ] Take into account the name of the reference genome / the change: + - [ ] Somebody could want to launch two visualisations of two different genomes. Repeats of Genome with extensions associated +- [ ] Add TDD => First add the test. It should not pass. Implement. It should now pass :) +- [ ] Replace Gff3 by an abstract class GeneralFormat, with two sub-classes GFF3Format and GTFFormat +- [ ] TrackHub should check if the 2bit already exists instead of recreating it (which is the case atm) +- [ ] Manage the error when a user is selecting Generic Bed instead of Bed Simple Repeats. Two options: a. Output a better error message ("Check with the other Bed options") b. Identify internally this is not a regular BED but a specific one +- [ ] Remove the non-explicit parameters for the communication between Galaxy Wrapper and the entry point +- [ ] Rename all occurences of `extension` which `datatype` +- [ ] Follow https://google.github.io/styleguide/pyguide.html +- [ ] Move to Python 3 +- [ ] Remove the repetition of the extension if it already exists +- [ ] Better thinking about the tool_directory management / Classes path refactoring +- [ ] Add a debug mode to have more outputs +- [ ] Improve the standard output of HAC + +### DONE + + +- [x] Each time a file is added => Print it in the output with the full path (or relative path to root) +- [x] Add a script for Linux.x86_64 to download and and chmod +x the dependencies for local testing => util/install_linux_binaries.py +- [x] Add sorting BED if not sorted (Use the output of bedToBigBed) +- [x] Add a script to install the huba datatype +- [x] Add the possibility to add a new item in TrackDb.txt through a public function from TrackHub.py => addTrack() in TrackHub.py +- [x] Fix the errors for the stdio regexp not properly processed in error case (always green) => Used `detect_errors` in galaxy wrapper and raise Exception in Python +- [x] Add a class named ~~TrfBigProcess~~ BedSimpleRepeats +- [x] Add a class named TrackHub: Create the base TrackHub hierarchy +- [x] Change the Name of the classes +- [x] Add a class named AugustusProcess: Process the Augustus output to BigBed (and others needed in TrackHub) and (create folders + add the files into the right location => Process can be ported in a class responsible for that) + - [x] Add a class named AugustusProcess + - [x] Process the Augustus output to BigBed + - [x] create folders + add the files into the right location + - [x] Creation of folders to be ported into a separated class => In Datatype.py but should be into a dedicated to file manipulation class + - [x] Refactoring of the AugustusProcess class to behave like a class and not like a procedural masked into a class + - [x] Rename AugustusProcess into something more generic if the process is shared (gtf to BigBed) => Gff3.py +- [x] Use gffToBed for Gtf instead of GtfToGenePred => Cancelled +- [x] Clean the mess with the File handling (sometimes File, sometimes String, sometimes open File) +- [x] Find a way to avoid repetitions in TrackDb and Track (I repeat myself atm) => Track instance has a TrackDb instance as attribute +- [x] Refactor the creation of the structure to TrackHub: Access to paths via this Class, and creation of file through it |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,94 @@ +<?xml version="1.0"?> +<tool_dependency> + <!-- UCSC Tools in --> + <!-- Useful for HAC are: + - twoBitInfo + - sort + - bedToBigBed + - gff3ToGenePred + - gtfToGenePred + - genePredToBed + - faToTwoBit + - samtools + --> + <package name="ucsc_tools" version="312"> + <repository changeset_revision="2d6bafd63401" name="package_ucsc_tools_312" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + + <package name="samtools" version="1.2"> + <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + + <!-- TODO: Tools to install --> + <!-- twoBitInfo / bedToBigBed / faToTwoBit --> + + + <!-- Package gff3ToGenePred --> + <package name="gff3ToGenePred" version="0.0.1"> + <install version="1.0"> + <actions> + <action target_directory="bin" type="download_binary"> + <url_template os="darwin"> + http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.${architecture}/gff3ToGenePred + </url_template> + <url_template os="linux"> + http://hgdownload.cse.ucsc.edu/admin/exe/linux.${architecture}/gff3ToGenePred + </url_template> + </action> + <action type="chmod"> + <file mode="750">$INSTALL_DIR/bin/gff3ToGenePred</file> + </action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme>The tools downloaded by this dependency definition are free for academic use.</readme> + </package> + + <!-- Package gtfToGenePred --> + <package name="gtfToGenePred" version="0.0.1"> + <install version="1.0"> + <actions> + <action target_directory="bin" type="download_binary"> + <url_template os="darwin"> + http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.${architecture}/gtfToGenePred + </url_template> + <url_template os="linux"> + http://hgdownload.cse.ucsc.edu/admin/exe/linux.${architecture}/gtfToGenePred + </url_template> + </action> + <action type="chmod"> + <file mode="750">$INSTALL_DIR/bin/gtfToGenePred</file> + </action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme>The tools downloaded by this dependency definition are free for academic use.</readme> + </package> + + <!-- Package genePredToBed --> + <package name="genePredToBed" version="0.0.1"> + <install version="1.0"> + <actions> + <action target_directory="bin" type="download_binary"> + <url_template os="darwin"> + http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.${architecture}/genePredToBed + </url_template> + <url_template os="linux"> + http://hgdownload.cse.ucsc.edu/admin/exe/linux.${architecture}/genePredToBed + </url_template> + </action> + <action type="chmod"> + <file mode="750">$INSTALL_DIR/bin/genePredToBed</file> + </action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme>The tools downloaded by this dependency definition are free for academic use.</readme> + </package> +</tool_dependency> |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trackHub/README.md Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,12 @@ +This folder exists to configure Galaxy to handle the files generated by HubArchiveCreator tool + +* The **content** of datatypes_conf.xml should go inside config/datatypes_conf.xml, under \<registration\> (Copy config/datatypes_conf.xml.sample to config/datatypes_conf.xml if it does not exist yet) +* The content of tracks_partial.py should go inside `lib/galaxy/datatypes/tracks.py` +* trackhub.xml should go inside display_application/ucsc/ + + +TODO: + +- [x] Create a script to copy all these files directly into galaxy => Done but not usable now I have changed the datatype to match Galaxy IUC +- [ ] Need to modify the script `util/add_datatype.py` to match the changes introduced in https://github.com/galaxyproject/galaxy/pull/2348 +- [x] Create a package in ToolShed that install directly this when installing HubArchiveCreator => Not recommended by Galaxy IUC and team |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trackHub/datatypes_conf.xml Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,3 @@ +<datatype extension="trackhub" type="galaxy.datatypes.tracks:UCSCTrackHub" display_in_upload="true"> + <display file="ucsc/trackhub.xml" /> +</datatype> |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/trackhub.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trackHub/trackhub.xml Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,6 @@ +<display id="ucsc_trackhub" version="1.0.0" name="display at Track Hub UCSC"> + <link id="main" name="main"> + <url>https://genome.ucsc.edu/cgi-bin/hgHubConnect?hubUrl=${qp($hub_file.url + '/myHub/hub.txt')}&hgHub_do_firstDb=on&hgHub_do_redirect=on&hgHubConnect.remakeTrackHub=on</url> + <param type="data" name="hub_file" url="galaxy_${DATASET_HASH}" allow_extra_files_access="True" /> + </link> +</display> |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/tracks_partial.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trackHub/tracks_partial.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,56 @@ +""" +HubAssembly datatype +""" +import logging + +from galaxy.datatypes.text import Html + +log = logging.getLogger( __name__ ) + +# !!! README !!! The content of this file should be added in tracks.py, but do it carefully! +# Don't erase the existing content + + +class UCSCTrackHub( Html ): + """ + derived class for BioC data structures in Galaxy + """ + + file_ext = 'trackhub' + composite_type = 'auto_primary_file' + + def __init__( self, **kwd ): + Html.__init__( self, **kwd ) + + def generate_primary_file( self, dataset=None ): + """ + This is called only at upload to write the html file + cannot rename the datasets here - they come with the default unfortunately + """ + rval = [ + '<html><head><title>Files for Composite Dataset (%s)</title></head><p/>\ + This composite dataset is composed of the following files:<p/><ul>' % ( + self.file_ext)] + for composite_name, composite_file in self.get_composite_files(dataset=dataset).iteritems(): + opt_text = '' + if composite_file.optional: + opt_text = ' (optional)' + rval.append('<li><a href="%s">%s</a>%s' % (composite_name, composite_name, opt_text)) + rval.append('</ul></html>') + return "\n".join(rval) + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Track Hub structure: Visualization in UCSC Track Hub" + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Track Hub structure: Visualization in UCSC Track Hub" + + def sniff( self, filename ): + return False |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a trf_simpleRepeat.as --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trf_simpleRepeat.as Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,20 @@ +table simpleRepeat +"Describes the Simple Tandem Repeats" + ( + string chrom; "Reference sequence chromosome or scaffold" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Simple Repeats tag name" + uint period; "Length of repeat unit" + float copyNum; "Mean number of copies of repeat" + uint consensusSize; "Length of consensus sequence" + uint perMatch; "Percentage Match" + uint perIndel; "Percentage Indel" + uint score; "Alignment Score = 2*match-7*mismatch-7*indel; minscore=50" + uint A; "Percent of A's in repeat unit" + uint C; "Percent of C's in repeat unit" + uint G; "Percent of G's in repeat unit" + uint T; "Percent of T's in repeat unit" + float entropy; "Entropy" + lstring sequence; "Sequence of repeat unit element" + ) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/__init__.pyc |
b |
Binary file util/__init__.pyc has changed |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/add_datatype.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/add_datatype.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,74 @@ +#!/usr/bin/python + +""" +This script copy the huba datatype into your galaxy: + - Add under <registration>, the datatype_conf + - Add huba.xml under display_application/ucsc/ + - Add hubAssembly.py inside lib/galaxy/datatypes +Place yourself in the folder of the python script, and launch it +- Based on the fact datatypes_conf +""" + +import argparse +import os +import shutil +import sys +import xml.etree.ElementTree as ET + + +def main(argv): + # Command Line parsing init + parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') + + parser.add_argument('-g', '--galaxy_root', help='Galaxy root folder', required=True) + + # Get the args passed in parameter + args = parser.parse_args() + + galaxy_root_path = args.galaxy_root + + add_datatype_conf(galaxy_root_path) + add_huba_xml(galaxy_root_path) + add_hubAssembly(galaxy_root_path) + + +def add_datatype_conf(galaxy_root_path): + print "======= Add datatype =======" + datatype_conf_path = os.path.join(galaxy_root_path, 'config/datatypes_conf.xml') + # TODO: Not relative to this python file but based on a parameter galaxy_root + # TODO: Check if datatypes_conf.xml, if not create it by copying datatypes_conf.xml.sample + # TODO: For debug only + # tree = ET.parse('../test-data/add_datatype/datatypes_conf.xml.sample') + # TODO: UnComment for prod + tree = ET.parse(datatype_conf_path) + root = tree.getroot() + print root.tag + registration = root[0] + print registration.attrib + + huba_datatype = ET.parse('../trackHub/datatypes_conf.xml').getroot() + # TODO: Verify the datatype is not already existing, else do not add / write. And in another version, check it + registration.append(huba_datatype) + tree.write(datatype_conf_path) + print "datatype added in %s" % datatype_conf_path + return + + +def add_huba_xml(galaxy_root_path): + print "======= Add hub xml =======" + displayApp_ucsc_path = os.path.join(galaxy_root_path, "display_applications/ucsc/") + shutil.copy("../trackHub/huba.xml", displayApp_ucsc_path) + print "Content of %s now: %s" % (displayApp_ucsc_path, os.listdir(displayApp_ucsc_path)) + return + + +def add_hubAssembly(galaxy_root_path): + print "======= Add hubAssembly =======" + datatype_lib_path = os.path.join(galaxy_root_path, "lib/galaxy/datatypes/") + shutil.copy("../trackHub/hubAssembly.py", datatype_lib_path) + print "Content of %s now: %s" % (datatype_lib_path, os.listdir(datatype_lib_path)) + return + + +if __name__ == "__main__": + main(sys.argv) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/cleanDirectory.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/cleanDirectory.py Wed Jul 13 13:36:37 2016 -0400 |
b |
@@ -0,0 +1,14 @@ +#!/usr/bin/python +"""Use to clean the directory after the run of HubArchiveCreator.py manually""" +import os +import shutil + +# Remove 'myHub.zip at root folder +try: + os.remove('myHub.zip') +except OSError as o: + # We don't need to crash the program + print 'Warning: ' + str(o) + +# Remove 'myHub' folder and its content +shutil.rmtree('myHub', ignore_errors=True) |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/install_linux_binaries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/install_linux_binaries Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +localBinPath="/usr/local/bin/" +hgDownloadURl="http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/" + +# Binaries needed for HAC +twoBitInfo="twoBitInfo" +faToTwoBit="faToTwoBit" +gff3ToGenePred="gff3ToGenePred" +gtfToGenePred="gtfToGenePred" +genePredToBed="genePredToBed" +# sort="sort" +bedToBigBed="bedToBigBed" + +binariesArray=( ${twoBitInfo} ${faToTwoBit} ${gff3ToGenePred} ${gtfToGenePred} ${genePredToBed} ${bedToBigBed} ) + +# Download and install binaries +for binaryName in "${binariesArray[@]}" +do + binaryInstallationPath="${localBinPath}/${binaryName}" + sudo wget -P ${localBinPath} "${hgDownloadURl}/${binaryName}" + sudo chown ${USER} ${binaryInstallationPath} + sudo chmod +x ${binaryInstallationPath} +done |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/subtools.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/subtools.py Wed Jul 13 13:36:37 2016 -0400 |
[ |
@@ -0,0 +1,159 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +""" +This class handles the subprocess calls of the different tools used +in HubArchiveCreator +""" + +import os +import subprocess + + +def _handleExceptionAndCheckCall(array_call, **kwargs): + """ + This class handle exceptions and call the tool. + It maps the signature of subprocess.check_call: + See https://docs.python.org/2/library/subprocess.html#subprocess.check_call + """ + stdin = kwargs.get('stdin') + stdout = kwargs.get('stdout') + stderr = kwargs.get('stderr') + shell = kwargs.get('shell') + try: + p = subprocess.check_call(array_call, stdin=stdin, stdout=stdout, stderr=stderr, shell=shell) + except subprocess.CalledProcessError: + raise + return p + + +def twoBitInfo(two_bit_file_name, two_bit_info_file): + """ + Call twoBitInfo and write the result into twoBit_info_file + :param two_bit_file_name: + :param two_bit_info_file: + :return the subprocess.check_call return object: + """ + array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def faToTwoBit(fasta_file_name, mySpecieFolder): + """ + This function call faToTwoBit UCSC tool, and return the twoBitFile + :param fasta_file_name: + :param mySpecieFolder: + :return: + """ + baseNameFasta = os.path.basename(fasta_file_name) + suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta) + nameTwoBit = suffixTwoBit + '.2bit' + + with open(os.path.join(mySpecieFolder, nameTwoBit), 'w') as twoBitFile: + array_call = ['faToTwoBit', fasta_file_name, twoBitFile.name] + _handleExceptionAndCheckCall(array_call) + + return twoBitFile + + +def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): + """ + Call gtfToGenePred and write the result into gene_pred_file_name + :param input_gtf_file_name: + :param gene_pred_file_name: + :return: + """ + array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): + """ + Call gff3ToGenePred and write the result into gene_pred_file_name + :param input_gff3_file_name: + :param gene_pred_file_name: + :return: + """ + array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): + """ + Call genePredToBed and write the result into unsorted_bed_file_name + :param gene_pred_file_name: + :param unsorted_bed_file_name: + :return: + """ + array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def sort(unsorted_bed_file_name, sorted_bed_file_name): + """ + Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name + :param unsorted_bed_file_name: + :param sorted_bed_file_name: + :return: + """ + array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): + """ + Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name + :param two_bit_info_file_name: + :param chrom_sizes_file_name: + :return: + """ + array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, typeOption=None, autoSql=None): + """ + Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name + :param sorted_bed_file_name: + :param chrom_sizes_file_name: + :param big_bed_file_name: + :return: + """ + array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name] + if typeOption: + array_call.append(typeOption) + if autoSql: + array_call.append(autoSql) + + p = _handleExceptionAndCheckCall(array_call) + return p + + +def sortBam(input_bam_file_name, output_sorted_bam_name): + """ + Call samtools on input_bam_file_name and output the result in output_sorted_bam_name + :param input_bam_file_name: + :param output_sorted_bam_name: + :return: + """ + array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name] + p = _handleExceptionAndCheckCall(array_call) + return p + + +def createBamIndex(input_sorted_bam_file_name, output_name_index_name): + """ + Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name + :param input_sorted_bam_file_name: + :param output_name_index_name: + :return: + """ + array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name] + p = _handleExceptionAndCheckCall(array_call) + return p |
b |
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/subtools.pyc |
b |
Binary file util/subtools.pyc has changed |