Repository 'hubarchivecreator'
hg clone https://toolshed.g2.bx.psu.edu/repos/rmarenco/hubarchivecreator

Changeset 1:fb5e60d4d18a (2016-07-13)
Previous changeset 0:0f3bc17e5ede (2016-07-13) Next changeset 2:df9e12da0d13 (2016-07-14)
Commit message:
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
added:
Bam.py
Bam.pyc
Bed.py
Bed.pyc
BedSimpleRepeats.py
BedSimpleRepeats.pyc
BigWig.py
BigWig.pyc
Datatype.py
Datatype.pyc
Gff3.py
Gff3.pyc
Gtf.py
Gtf.pyc
Track.py
Track.pyc
TrackDb.py
TrackDb.pyc
TrackHub.py
TrackHub.pyc
__init__.py
hubArchiveCreator.py
templates/__init__.py
templates/genomesAssembly/__init__.py
templates/genomesAssembly/layout.txt
templates/groupsTxt/__init__.py
templates/groupsTxt/layout.txt
templates/hubDescription/__init__.py
templates/hubDescription/layout.txt
templates/hubTxt/__init__.py
templates/hubTxt/layout.txt
templates/specieDescription/__init__.py
templates/specieDescription/layout.txt
templates/trackDb/__init__.py
templates/trackDb/layout.txt
test-data/augustus/hubaInputs/GTF/augustusDbia3.gtf
test-data/augustus/hubaInputs/GTF/dbia3.fa
test-data/augustus/output/augustusDbia3.bb
test-data/augustus/workflowInputs/dbia3.fa
test-data/augustusDbia3.gff3
test-data/augustusOutput.html
test-data/dbia3.fa
test-data/glimmerHMM_output.gff3
test-data/tblastN/dbia3.xml.bb
test-data/tblastN/dbia3.xml.sorted.bed
test-data/tblastN/dbia3.xml.unbb.bed
test-data/tblastN/dbia3.xml.unsorted.bed
test-data/tblastN/hubaInputs/tblastn_Dbia3_ci.xml.sorted.bed
test-data/tblastN/readme/README.html
test-data/tblastN/readme/README.md
test-data/tblastN/workflowInputs/ci.pep
test-data/tblastN/workflowInputs/dbia3.fa
test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml
test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.bed
test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.psl
test-data/trfBig/hubaInputs/dbia3.fa
test-data/trfBig/hubaInputs/dbia3_trfBig_sorted.bed
test-data/trfBig/output/dbia3_trfBig.bb
test-data/trfBig/workflowInputs/dbia3.fa
test-data/trfBig/workflowInputs/dbia3.fa.txt
test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted.bed
test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted_withMaskedGenome.fa
todo.md
tool_dependencies.xml
trackHub/README.md
trackHub/datatypes_conf.xml
trackHub/trackhub.xml
trackHub/tracks_partial.py
trf_simpleRepeat.as
util/__init__.py
util/__init__.pyc
util/add_datatype.py
util/cleanDirectory.py
util/install_linux_binaries
util/subtools.py
util/subtools.pyc
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Bam.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,68 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Class to handle Bam files to UCSC TrackHub
+"""
+
+import os
+import shutil
+
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class Bam( Datatype ):
+    def __init__( self, input_bam_false_path, data_bam ,
+                 inputFastaFile, extra_files_path, tool_directory ):
+        super(Bam, self).__init__( input_fasta_file=inputFastaFile,
+                                   extra_files_path=extra_files_path,
+                                   tool_directory=tool_directory,
+                                   )
+
+        self.track = None
+
+        self.input_bam_false_path = input_bam_false_path
+
+        self.data_bam = data_bam
+        # TODO: Check if it already contains the .bam extension / Do a function in Datatype which check the extension
+        self.name_bam = self.data_bam["name"] + ".bam"
+        self.priority = self.data_bam["order_index"]
+        self.index_bam = self.data_bam["index"]
+
+        print "Creating TrackHub BAM from (falsePath: %s; name: %s)" % ( self.input_bam_false_path, self.name_bam)
+
+        # First: Add the bam file
+        # Second: Add the bam index file, in the same folder (https://genome.ucsc.edu/goldenpath/help/bam.html)
+
+        bam_file_path = os.path.join(self.myTrackFolderPath, self.name_bam)
+        shutil.copyfile(self.input_bam_false_path, bam_file_path)
+
+        # Create and add the bam index file to the same folder
+        name_index_bam = self.name_bam + ".bai"
+        bam_index_file_path = os.path.join(self.myTrackFolderPath, name_index_bam)
+        shutil.copyfile(self.index_bam, bam_index_file_path)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % self.name_bam
+
+        trackDb = TrackDb(
+            trackName=self.name_bam,
+            longLabel=self.name_bam,
+            shortLabel=self.getShortName( self.name_bam ),
+            trackDataURL=dataURL,
+            trackType='bam',
+            visibility='pack',
+            priority=self.priority,
+        )
+
+        # Return the Bam Track Object
+        self.track = Track(
+            trackFile=bam_index_file_path,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (self.name_bam, bam_file_path))
+        print("- %s created in %s" % (self.index_bam, bam_index_file_path))
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bam.pyc
b
Binary file Bam.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bed.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Bed.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,72 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class Bed( Datatype ):
+    def __init__( self, inputBedGeneric, data_bed_generic,
+                 inputFastaFile, extra_files_path, tool_directory ):
+        super(Bed, self).__init__(
+            inputFastaFile, extra_files_path, tool_directory
+        )
+
+        self.track = None
+
+        self.inputBedGeneric = inputBedGeneric
+
+        self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+        self.chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+        self.twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+
+        self.data_bed_generic = data_bed_generic
+        self.name_bed_generic = self.data_bed_generic["name"]
+        self.priority = self.data_bed_generic["order_index"]
+
+        # Sort processing
+        subtools.sort(self.inputBedGeneric, self.sortedBedFile.name)
+
+        # Generate the chrom.sizes
+        # TODO: Isolate in a function
+        # We first get the twoBit Infos
+        subtools.twoBitInfo(self.twoBitFile.name, self.twoBitInfoFile.name)
+
+        # Then we get the output to inject into the sort
+        # TODO: Check if no errors
+        subtools.sortChromSizes(self.twoBitInfoFile.name, self.chromSizesFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
+        trackName = "".join( ( self.name_bed_generic, ".bb") )
+
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        with open(myBigBedFilePath, 'w') as self.bigBedFile:
+            subtools.bedToBigBed(self.sortedBedFile.name, self.chromSizesFile.name, self.bigBedFile.name)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % trackName
+
+        trackDb = TrackDb(
+            trackName=trackName,
+            longLabel=self.name_bed_generic,
+            shortLabel=self.getShortName(self.name_bed_generic),
+            trackDataURL=dataURL,
+            trackType='bigBed',
+            visibility='dense',
+            thickDrawItem='on',
+            priority=self.priority,
+        )
+
+        # Return the BigBed track
+        self.track = Track(
+            trackFile=myBigBedFilePath,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (trackName, myBigBedFilePath))
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Bed.pyc
b
Binary file Bed.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a BedSimpleRepeats.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BedSimpleRepeats.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,68 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class BedSimpleRepeats( Datatype ):
+    def __init__( self, input_bed_simple_repeats_false_path, data_bed_simple_repeats,
+                 input_fasta_file, extra_files_path, tool_directory ):
+
+        super(BedSimpleRepeats, self).__init__(
+                input_fasta_file, extra_files_path, tool_directory
+        )
+
+        self.input_bed_simple_repeats_false_path = input_bed_simple_repeats_false_path
+        self.name_bed_simple_repeats = data_bed_simple_repeats["name"]
+        self.priority = data_bed_simple_repeats["order_index"]
+
+        sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+
+        # Sort processing
+        subtools.sort(self.input_bed_simple_repeats_false_path, sortedBedFile.name)
+
+        # TODO: Regroup in an mother class which handles the Chrom.sizes creation with Gff3 and Gtf
+        # Generate the chrom.sizes
+
+        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to inject into the sort
+        # TODO: Check if no errors
+        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + .bb
+        trackName = "".join( ( self.name_bed_simple_repeats, '.bb' ) )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        auto_sql_option = "%s%s" % ('-as=', os.path.join(self.tool_directory, 'trf_simpleRepeat.as'))
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name,
+                                 typeOption='-type=bed4+12',
+                                 autoSql=auto_sql_option)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % trackName
+
+        trackDb = TrackDb(
+            trackName=trackName,
+            longLabel=self.name_bed_simple_repeats,
+            shortLabel=self.getShortName( self.name_bed_simple_repeats ),
+            trackDataURL=dataURL,
+            trackType='bigBed 4 +',
+            visibility='dense',
+            priority=self.priority,
+        )
+
+        self.track = Track(
+            trackFile=myBigBedFilePath,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (trackName, myBigBedFilePath))
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a BedSimpleRepeats.pyc
b
Binary file BedSimpleRepeats.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a BigWig.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BigWig.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+
+import os
+import shutil
+
+# Internal dependencies
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+
+
+class BigWig( Datatype ):
+    def __init__(self, input_bigwig_path, data_bigwig,
+                 input_fasta_path, extra_files_path, tool_directory):
+        super(BigWig, self).__init__(
+                input_fasta_path, extra_files_path, tool_directory
+        )
+
+        self.track = None
+
+        self.input_bigwig_path = input_bigwig_path
+        self.name_bigwig = data_bigwig["name"]
+        self.priority = data_bigwig["order_index"]
+
+        print "Creating TrackHub BigWig from (falsePath: %s; name: %s)" % ( self.input_bigwig_path, self.name_bigwig )
+
+        trackName = "".join( ( self.name_bigwig, ".bigwig" ) )
+
+        myBigWigFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        shutil.copy(self.input_bigwig_path, myBigWigFilePath)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % trackName
+
+        # Return the BigBed track
+        trackDb = TrackDb(
+            trackName=trackName,
+            longLabel=self.name_bigwig,
+            shortLabel=self.getShortName( self.name_bigwig ),
+            trackDataURL=dataURL,
+            trackType='bigWig',
+            visibility='full',
+            priority=self.priority,
+        )
+
+        self.track = Track(
+            trackFile=myBigWigFilePath,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (trackName, myBigWigFilePath))
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a BigWig.pyc
b
Binary file BigWig.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Datatype.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Datatype.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+Super Class of the managed datatype
+"""
+
+import os
+
+from util import subtools
+
+
+class Datatype(object):
+    def __init__( self, input_fasta_file, extra_files_path, tool_directory ):
+
+        self.input_fasta_file = input_fasta_file
+        self.extra_files_path = extra_files_path
+        self.tool_directory = tool_directory
+
+
+        # Construction of the arborescence
+        # TODO: Change the hard-coded path with a input based one
+        self.mySpecieFolderPath = os.path.join(extra_files_path, "myHub", "dbia3")
+
+        # TODO: Refactor the name of the folder "tracks" into one variable, and should be inside TrackHub object
+        self.myTrackFolderPath = os.path.join(self.mySpecieFolderPath, "tracks")
+
+        # TODO: Redundant, should be refactored because they are all doing it...into hubArchiveCreator?
+        # 2bit file creation from input fasta
+        self.twoBitFile = subtools.faToTwoBit(self.input_fasta_file, self.mySpecieFolderPath)
+
+    def getShortName( self, name_to_shortify ):
+        # Slice to get from Long label the short label
+        short_label_slice = slice(0, 15)
+
+        return name_to_shortify[short_label_slice]
\ No newline at end of file
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Datatype.pyc
b
Binary file Datatype.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Gff3.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class Gff3( Datatype ):
+    def __init__( self, input_Gff3_false_path, data_gff3,
+                  input_fasta_false_path, extra_files_path, tool_directory ):
+        super( Gff3, self ).__init__(
+                input_fasta_false_path, extra_files_path, tool_directory
+        )
+
+        self.track = None
+
+        self.input_Gff3_false_path = input_Gff3_false_path
+        self.name_gff3 = data_gff3["name"]
+        self.priority = data_gff3["order_index"]
+
+        # TODO: See if we need these temporary files as part of the generated files
+        genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
+        unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
+        sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        # TODO: Refactor into another Class to manage the twoBitInfo and ChromSizes (same process as in Gtf.py)
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+
+        # gff3ToGenePred processing
+        subtools.gff3ToGenePred(self.input_Gff3_false_path, genePredFile.name)
+
+        # TODO: From there, refactor because common use with Gtf.py
+        # genePredToBed processing
+        subtools.genePredToBed(genePredFile.name, unsortedBedFile.name)
+
+        # Sort processing
+        subtools.sort(unsortedBedFile.name, sortedBedFile.name)
+
+        # Generate the twoBitInfo
+        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        # TODO: Check if no errors
+        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
+        trackName = "".join( (self.name_gff3, ".bb" ) )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % trackName
+
+        trackDb = TrackDb(
+            trackName=trackName,
+            longLabel=self.name_gff3,
+            shortLabel=self.getShortName( self.name_gff3 ),
+            trackDataURL=dataURL,
+            trackType='bigBed 12 +',
+            visibility='dense',
+            priority=self.priority,
+        )
+
+        self.track = Track(
+            trackFile=myBigBedFilePath,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (trackName, myBigBedFilePath))
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gff3.pyc
b
Binary file Gff3.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gtf.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Gtf.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,78 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class Gtf( Datatype ):
+    def __init__( self, input_gtf_false_path, data_gtf,
+                 input_fasta_file, extra_files_path, tool_directory ):
+        super(Gtf, self).__init__( input_fasta_file=input_fasta_file,
+                                   extra_files_path=extra_files_path,
+                                   tool_directory=tool_directory )
+
+        self.track = None
+
+        self.input_gtf_false_path = input_gtf_false_path
+        self.name_gtf = data_gtf["name"]
+        self.priority = data_gtf["order_index"]
+
+        print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
+
+        # TODO: See if we need these temporary files as part of the generated files
+        genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
+        unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed")
+        sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0)
+        chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes")
+
+        # GtfToGenePred
+        subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name)
+
+        # TODO: From there, refactor because common use with Gff3.py
+        #  genePredToBed processing
+        subtools.genePredToBed(genePredFile.name, unsortedBedFile.name)
+
+        # Sort processing
+        subtools.sort(unsortedBedFile.name, sortedBedFile.name)
+
+        # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class
+        # Generate the twoBitInfo
+        subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name)
+
+        # Then we get the output to generate the chromSizes
+        # TODO: Check if no errors
+        subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
+        trackName = "".join( ( self.name_gtf, ".bb") )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name)
+
+        # Create the Track Object
+        dataURL = "tracks/%s" % trackName
+
+        trackDb = TrackDb(
+            trackName=trackName,
+            longLabel=self.name_gtf,
+            shortLabel=self.getShortName( self.name_gtf ),
+            trackDataURL=dataURL,
+            trackType='bigBed 12 +',
+            visibility='dense',
+            priority=self.priority,
+        )
+        self.track = Track(
+            trackFile=myBigBedFilePath,
+            trackDb=trackDb,
+        )
+
+        print("- %s created in %s" % (trackName, myBigBedFilePath))
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Gtf.pyc
b
Binary file Gtf.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Track.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Track.py Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+
+class Track(object):
+    """Class to manage the track informations needed for Track Hub, in the TrackDb text file"""
+
+    def __init__(self, trackFile=None, trackDb=None):
+        self.trackFile = trackFile
+
+        self.trackDb = trackDb
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a Track.pyc
b
Binary file Track.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackDb.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/TrackDb.py Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,17 @@
+#!/usr/bin/python
+
+class TrackDb(object):
+    """docstring for TrackDb"""
+
+    def __init__(self, trackName="", longLabel="", shortLabel="", trackDataURL="", trackType="", visibility="",
+                 thickDrawItem='off', priority="0"):
+        super(TrackDb, self).__init__()
+
+        self.trackName = trackName
+        self.longLabel = longLabel
+        self.shortLabel = shortLabel
+        self.trackDataURL = trackDataURL
+        self.trackType = trackType
+        self.visibility = visibility
+        self.thickDrawItem = thickDrawItem
+        self.priority = priority
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackDb.pyc
b
Binary file TrackDb.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackHub.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/TrackHub.py Wed Jul 13 13:36:37 2016 -0400
[
b'@@ -0,0 +1,218 @@\n+#!/usr/bin/python\n+# -*- coding: utf8 -*-\n+\n+import os\n+import zipfile\n+\n+from mako.lookup import TemplateLookup\n+\n+\n+class TrackHub(object):\n+    """docstring for TrackHub"""\n+\n+    def __init__(self, inputFastaFile, outputFile, extra_files_path, tool_directory):\n+        super(TrackHub, self).__init__()\n+\n+        self.rootAssemblyHub = None\n+        self.mySpecieFolderPath = None\n+        self.tool_directory = tool_directory\n+\n+        # TODO: Modify according to the files passed in parameter\n+        mylookup = TemplateLookup(directories=[os.path.join(tool_directory, \'templates/trackDb\')],\n+                                  output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+        self.trackDbTemplate = mylookup.get_template("layout.txt")\n+\n+        self.extra_files_path = extra_files_path\n+        self.outputFile = outputFile\n+\n+        inputFastaFile = open(inputFastaFile, \'r\')\n+        self.outputZip = zipfile.ZipFile(os.path.join(extra_files_path, \'myHub.zip\'), \'w\')\n+\n+        # Create the structure of the Assembly Hub\n+        # TODO: Merge the following processing into a function as it is also used in twoBitCreator\n+        baseNameFasta = os.path.basename(inputFastaFile.name)\n+        suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)\n+        self.twoBitName = suffixTwoBit + \'.2bit\'\n+\n+        self.rootAssemblyHub = self.__createAssemblyHub__(toolDirectory=tool_directory,\n+                                                          extra_files_path=extra_files_path)\n+\n+    def createZip(self):\n+        for root, dirs, files in os.walk(self.rootAssemblyHub):\n+            # Get all files and construct the dir at the same time\n+            for file in files:\n+                self.outputZip.write(os.path.join(root, file))\n+\n+        self.outputZip.close()\n+\n+    def addTrack(self, trackDbObject=None):\n+        # Create the trackDb.txt file in the specie folder, if not exists\n+        # Else append the new track\n+        trackDbTxtFilePath = os.path.join(self.mySpecieFolderPath, \'trackDb.txt\')\n+\n+        # Append to trackDbTxtFilePath the trackDbTemplate populate with the newTrack object\n+        with open(trackDbTxtFilePath, \'a+\') as trackDbFile:\n+            trackDbs = [trackDbObject]\n+            htmlMakoRendered = self.trackDbTemplate.render(\n+                trackDbs=trackDbs\n+            )\n+            trackDbFile.write(htmlMakoRendered)\n+\n+    def terminate(self):\n+        # Just a test to output a simple HTML\n+        with open(self.outputFile, \'w\') as htmlOutput:\n+            htmlOutput.write(\'<html>\')\n+            htmlOutput.write(\'<body>\')\n+            htmlOutput.write(\'<p>\')\n+            htmlOutput.write(\'The following generated by Hub Archive Creator:\')\n+            htmlOutput.write(\'</p>\')\n+            htmlOutput.write(\'<ul>\')\n+            for root, dirs, files in os.walk(self.extra_files_path):\n+                for file in files:\n+                    relDir = os.path.relpath(root, self.extra_files_path)\n+                    htmlOutput.write(str.format(\'<li><a href="{0}">{1}</a></li>\', os.path.join(relDir, file),\n+                                                os.path.join(relDir, file)))\n+            htmlOutput.write(\'<ul>\')\n+            htmlOutput.write(\'</body>\')\n+            htmlOutput.write(\'</html>\')\n+\n+    def __createAssemblyHub__(self, toolDirectory, extra_files_path):\n+        # TODO: Manage to put every fill Function in a file dedicated for reading reasons\n+        # Create the root directory\n+        myHubPath = os.path.join(extra_files_path, "myHub")\n+        if not os.path.exists(myHubPath):\n+            os.makedirs(myHubPath)\n+\n+        # Add the genomes.txt file\n+        genomesTxtFilePath = os.path.join(myHubPath, \'genomes.txt\')\n+        self.__fillGenomesTxt__(genomesTxtFilePath, toolDirectory)\n+\n+        # Add the hub.txt file\n+        hubTxtFilePath = os.path.join(myHubPath, \'hub.txt\')\n+        self.__fillHubTxt__(hubTxtFilePath, toolDirectory)\n+\n+    '..b'ription="dbia3/description.html"\n+            )\n+            genomesTxtFile.write(htmlMakoRendered)\n+\n+    def __fillHubTxt__(self, hubTxtFilePath, toolDirectory):\n+        # TODO: Think about the inputs and outputs\n+        # TODO: Manage the template of this file\n+        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/hubTxt\')],\n+                                  output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+        mytemplate = mylookup.get_template(\'layout.txt\')\n+        with open(hubTxtFilePath, \'w\') as genomesTxtFile:\n+            # Write the content of the file genomes.txt\n+            htmlMakoRendered = mytemplate.render(\n+                hubName=\'dbiaOnly\',\n+                shortLabel=\'dbia\',\n+                longLabel=\'This hub only contains dbia with the gene predictions\',\n+                genomesFile=\'genomes.txt\',\n+                email=\'rmarenco@gwu.edu\',\n+                descriptionUrl=\'dbia.html\'\n+            )\n+            genomesTxtFile.write(htmlMakoRendered)\n+\n+    def __fillHubHtmlFile__(self, hubHtmlFilePath, toolDirectory):\n+        # TODO: Think about the inputs and outputs\n+        # TODO: Manage the template of this file\n+        # renderer = pystache.Renderer(search_dirs="templates/hubDescription")\n+        # t = Template(templates.hubDescription.layout.html)\n+        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/hubDescription\')],\n+                                  output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+        mytemplate = mylookup.get_template("layout.txt")\n+        with open(hubHtmlFilePath, \'w\') as hubHtmlFile:\n+            # Write the content of the file genomes.txt\n+            # htmlPystached = renderer.render_name(\n+            #     "layout",\n+            #     {\'specie\': \'Dbia\',\n+            #     \'toolUsed\': \'Augustus\',\n+            #     \'ncbiSpecieUrl\': \'http://www.ncbi.nlm.nih.gov/genome/3499\',\n+            #     \'genomeID\': \'3499\',\n+            #     \'SpecieFullName\': \'Drosophila biarmipes\'})\n+            htmlMakoRendered = mytemplate.render(\n+                specie=\'Dbia\',\n+                toolUsed=\'Augustus\',\n+                ncbiSpecieUrl=\'http://www.ncbi.nlm.nih.gov/genome/3499\',\n+                genomeID=\'3499\',\n+                specieFullName=\'Drosophila biarmipes\'\n+            )\n+            # hubHtmlFile.write(htmlPystached)\n+            hubHtmlFile.write(htmlMakoRendered)\n+\n+    def __fillDescriptionHtmlFile__(self, descriptionHtmlFilePath, toolDirectory):\n+        # TODO: Think about the inputs and outputs\n+        # TODO: Manage the template of this file\n+        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/specieDescription\')],\n+                                  output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+        mytemplate = mylookup.get_template("layout.txt")\n+        with open(descriptionHtmlFilePath, \'w\') as descriptionHtmlFile:\n+            # Write the content of the file genomes.txt\n+            htmlMakoRendered = mytemplate.render(\n+                specieDescription=\'This is the description of the dbia\',\n+            )\n+            descriptionHtmlFile.write(htmlMakoRendered)\n+\n+    def __fillGroupsTxtFile__(self, groupsTxtFilePath, toolDirectory):\n+        # TODO: Reenable this function at some point\n+        mylookup = TemplateLookup(directories=[os.path.join(toolDirectory, \'templates/groupsTxt\')],\n+                                  output_encoding=\'utf-8\', encoding_errors=\'replace\')\n+        mytemplate = mylookup.get_template("layout.txt")\n+        with open(groupsTxtFilePath, \'w\') as groupsTxtFile:\n+            # Write the content of groups.txt\n+            # groupsTxtFile.write(\'name map\')\n+            htmlMakoRendered = mytemplate.render(\n+                mapName=\'map\',\n+                labelMapping=\'Mapping\',\n+                prioriy=\'2\',\n+                isClosed=\'0\'\n+            )\n+            # groupsTxtFile.write(htmlMakoRendered)\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a TrackHub.pyc
b
Binary file TrackHub.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a hubArchiveCreator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hubArchiveCreator.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,193 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This Galaxy tool permits to prepare your files to be ready for
+Assembly Hub visualization.
+Program test arguments:
+hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html
+"""
+
+import argparse
+import collections
+import json
+import sys
+
+# Internal dependencies
+from TrackHub import TrackHub
+from Gff3 import Gff3
+from Bam import Bam
+from BedSimpleRepeats import BedSimpleRepeats
+from Bed import Bed
+from BigWig import BigWig
+from Gtf import Gtf
+
+
+# TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
+
+
+def main(argv):
+    # Command Line parsing init
+    parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
+
+    # Reference genome mandatory
+    parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome')
+
+    # GFF3 Management
+    parser.add_argument('--gff3', action='append', help='GFF3 format')
+
+    # GTF Management
+    parser.add_argument('--gtf', action='append', help='GTF format')
+
+    # Bed4+12 (TrfBig)
+    parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
+
+    # Generic Bed (Blastx transformed to bed)
+    parser.add_argument('--bed', action='append', help='Bed generic format')
+
+    # BigWig Management
+    parser.add_argument('--bigwig', action='append', help='BigWig format')
+
+    # Bam Management
+    parser.add_argument('--bam', action='append', help='Bam format')
+
+    # TODO: Check if the running directory can have issues if we run the tool outside
+    parser.add_argument('-d', '--directory',
+                        help='Running tool directory, where to find the templates. Default is running directory')
+    parser.add_argument('-u', '--ucsc_tools_path',
+                        help='Directory where to find the executables needed to run this tool')
+    parser.add_argument('-e', '--extra_files_path',
+                        help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive')
+    parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive')
+
+    parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+
+    ucsc_tools_path = ''
+
+    toolDirectory = '.'
+    extra_files_path = '.'
+
+    # Get the args passed in parameter
+    args = parser.parse_args()
+
+    input_fasta_file = args.fasta
+
+    # TODO: Add array for each input because we can add multiple -b for example + filter the data associated
+
+
+    array_inputs_gff3 = args.gff3
+    array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+    array_inputs_bed_generic = args.bed
+    array_inputs_gtf = args.gtf
+    array_inputs_bam = args.bam
+    array_inputs_bigwig = args.bigwig
+
+    outputFile = args.output
+    json_inputs_data = args.data_json
+
+    inputs_data = json.loads(json_inputs_data)
+
+    # We remove the spaces in ["name"] of inputs_data
+    sanitize_name_inputs(inputs_data)
+
+    json_inputs_data = args.data_json
+
+    inputs_data = json.loads(json_inputs_data)
+    # We remove the spaces in ["name"] of inputs_data
+    sanitize_name_inputs(inputs_data)
+
+    if args.directory:
+        toolDirectory = args.directory
+    if args.extra_files_path:
+        extra_files_path = args.extra_files_path
+    if args.ucsc_tools_path:
+        ucsc_tools_path = args.ucsc_tools_path
+
+    # TODO: Check here all the binaries / tools we need. Exception is missing
+
+    # Create the Track Hub folder
+    trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
+
+    all_datatype_dictionary = {}
+
+    # Process Augustus
+    if array_inputs_gff3:
+        create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
+                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+
+    # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
+    if array_inputs_bed_simple_repeats:
+        create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
+                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+
+    # Process a Bed => tBlastN or TopHat
+    if array_inputs_bed_generic:
+        create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
+                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+
+    # Process a GTF => Tophat
+    if array_inputs_gtf:
+        create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
+                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+
+    # Process a Bam => Tophat
+    if array_inputs_bam:
+        create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
+                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+
+    # Process a BigWig => From Bam
+    if array_inputs_bigwig:
+        create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
+                                        extra_files_path, all_datatype_dictionary, toolDirectory)
+
+    # Create Ordered Dictionary to add the tracks in the tool form order
+    all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
+
+    for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
+        trackHub.addTrack(datatypeObject.track.trackDb)
+
+    # We process all the modifications to create the zip file
+    trackHub.createZip()
+
+    # We terminate le process and so create a HTML file summarizing all the files
+    trackHub.terminate()
+
+    sys.exit(0)
+
+
+def sanitize_name_inputs(inputs_data):
+    """
+    Sometimes output from Galaxy, or even just file name from user have spaces
+    :param inputs_data: dict[string, dict[string, string]]
+    :return:
+    """
+    for key in inputs_data:
+        inputs_data[key]["name"] = inputs_data[key]["name"].replace(" ", "_")
+
+
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
+                                    extra_files_path, all_datatype_dictionary, tool_directory):
+    """
+    Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
+    and update the dictionary of datatype
+    :param ExtensionClass: T <= Datatype
+    :param array_inputs: list[string]
+    :param inputs_data:
+    :param input_fasta_file: string
+    :param extra_files_path: string
+    :param tool_directory; string
+    """
+
+    datatype_dictionary = {}
+
+    # TODO: Optimize this double loop
+    for input_false_path in array_inputs:
+        for key, data_value in inputs_data.items():
+            if key == input_false_path:
+                extensionObject = ExtensionClass(input_false_path, data_value,
+                                                 input_fasta_file, extra_files_path, tool_directory)
+                datatype_dictionary.update({data_value["order_index"]: extensionObject})
+    all_datatype_dictionary.update(datatype_dictionary)
+
+if __name__ == "__main__":
+    main(sys.argv)
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/genomesAssembly/layout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/genomesAssembly/layout.txt Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,11 @@
+## TODO: Add a loop to be able to put this template for each genome
+genome ${genomeName}
+trackDb ${trackDbPath}
+groups ${groupsPath}
+description ${genomeDescription}
+twoBitPath ${twoBitPath}
+organism ${organismName}
+defaultPos ${defaultPosition}
+orderKey ${orderKey}
+scientificName ${scientificName}
+htmlPath ${pathAssemblyHtmlDescription}
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/groupsTxt/layout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/groupsTxt/layout.txt Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,4 @@
+name ${mapName}
+label ${labelMapping}
+priority ${prioriy}
+defaultIsClosed ${isClosed}
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/hubDescription/layout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/hubDescription/layout.txt Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,6 @@
+<html>
+<body>
+  ${specie} genome with ${toolUsed}
+  <a href="${ncbiSpecieUrl}">NCBI genome/${genomeID} (${specieFullName})</a>
+</body>
+</html>
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/hubTxt/layout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/hubTxt/layout.txt Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,6 @@
+hub ${hubName}
+shortLabel ${shortLabel}
+longLabel ${longLabel}
+genomesFile ${genomesFile}
+email ${email}
+descriptionUrl ${descriptionUrl}
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/specieDescription/layout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/specieDescription/layout.txt Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,5 @@
+<html>
+<body>
+  ${specieDescription}
+</body>
+</html>
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a templates/trackDb/layout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/trackDb/layout.txt Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,12 @@
+% for trackDb in trackDbs:
+    ## See this http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html
+    track ${trackDb.trackName}
+    longLabel ${trackDb.longLabel}
+    shortLabel ${trackDb.shortLabel}
+    bigDataUrl ${trackDb.trackDataURL}
+    type ${trackDb.trackType}
+    visibility ${trackDb.visibility}
+    thickDrawItem ${trackDb.thickDrawItem}
+    priority ${trackDb.priority}
+
+% endfor
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/hubaInputs/GTF/augustusDbia3.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus/hubaInputs/GTF/augustusDbia3.gtf Wed Jul 13 13:36:37 2016 -0400
[
b'@@ -0,0 +1,7076 @@\n+# This output was generated with AUGUSTUS (version 3.1.0).\n+# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de),\n+# Oliver Keller, Stefanie K\xc3\xb6nig and Lizzy Gerischer.\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# No extrinsic information on sequences given.\n+# Initialising the parameters using config directory /home/galaxy/galaxy/dependency_dir/augustus/3.1/iuc/package_augustus_3_1/24009970003a/config/ ...\n+# fly version. Using default transition matrix.\n+# Looks like /home/galaxy/galaxy/database/files/000/dataset_2.dat is in fasta format.\n+# We have hints for 0 sequences and for 0 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 45179, name = contig1) -----\n+#\n+# Constraints/Hints:\n+# (none)\n+# Predicted genes for sequence number 1 on both strands\n+# start gene contig1.g1\n+contig1\tAUGUSTUS\tgene\t553\t34688\t0.03\t-\t.\tcontig1.g1\n+contig1\tAUGUSTUS\ttranscript\t553\t34688\t0.03\t-\t.\tcontig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t553\t578\t0.41\t-\t2\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t652\t1047\t0.25\t-\t2\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t12541\t12968\t0.87\t-\t1\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t32202\t33826\t0.89\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t33880\t34044\t0.98\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t34107\t34619\t0.99\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tCDS\t34674\t34688\t0.45\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+contig1\tAUGUSTUS\tstart_codon\t34686\t34688\t.\t-\t0\ttranscript_id "contig1.g1.t1"; gene_id "contig1.g1";\n+# coding sequence = [atggcagcactctcgcaaaaggactttaatgacggaaaccagagaaatcgtcagaggaaaagtactgtcactgagcagc\n+# cctcatcaacatcaggaagcgtggcccaagtagaagcggacagcgcatcgtcacatttgtctgatcgctgttataacaacatagcaagtactaccaaa\n+# agtattgttggtgatgtgaaaacaagcagacgctgtgaagactttataagtagtggatcagcttcgactccattaaatgaatatgattgtgccaacgc\n+# tgatacgacggatattaaagatgaacctggagattttatagaaacaaattgtcattggcgaagttgttgtattgaatttaatacgcaagatgagcttg\n+# ttaaacatatcaataatgaccatatccaaaccaataagaaggcctttgtctgtcgatgggaaaattgtacccgtggtgaaaaacctttcaaagcgcag\n+# tatatgctagttgtgcatatgcgtcgtcataccggcgaaaagccacataaatgcacatttgaaggctgttttaaggcatattcacgcttggaaaacct\n+# aaaaacacatttacggtcacacacaggtgaaaaaccctatatgtgcgagtatccgggatgcagcaaggcgtttagtaatgcaagcgatcgtgcaaagc\n+# atcaaaatcgtactcacagtaatgagaaaccgtacatttgtaaagctcctggttgcacgaaacgttacaccgacccaagttctttgcgtaaacatgtt\n+# aaaacagttcatggtgctgagttttatgctaataaaaaacacaagggattgcccctaaatgatgcgaactctcgcctccaccgagacagtggccaagg\n+# tcggcataatcttcaagagcataatattgactctagcccttgcagcgaagaacttcaggtgggaaaacttataggcatgtctagtcccagtattaaat\n+# ctgaatctgatgaaagttcaccacatcatcagttgttaagtggagttcgagcttcagactgttttttaacatattcacaagatggtgccgccgaacat\n+# ataactttagatgacggctgggattgtgacgatgacgttgacgtggccgacttaccaattgtcttgcgtgctatggtaaatgttggcagcggaaattc\n+# gatggccccgaccattggagggggtgttgttgcgaggcagcgatttagaagtcgtttgcaaactaaaggaataaactctagtatggttatgcttggca\n+# acatccccgaaagcaatcgcaccattggaataagcgaacttaaccagcgtataacagaacttaagatggagcctggtaccgcttgtgatattacagtt\n+# ccattgaatacggggctggaaaggatttcagaagacttatcacaaaatcaatcaaatataacattaaataagcaaagcttctttaccgcttccggctc\n+# tctccaaggacattttcgtcgcgatagccagaactccactgcaagtacatattatggtagcatgcaaagtcgccgcagtagtcaatcatctcaggtgt\n+# cttctatatctacaatgcgtccaggcccatcgtacaacacaaccacagcttccctctatgatccaatttccccaggatgctctcgacgctctagccaa\n+# atgtctaatgtagtcaactcctacgcacttacatcaacatcaggattgactgcaattaacaaggacttaaatgcaaacagcagcccaaatgcttctat\n+# taataaaccgggtcttggtggtcagtactttggtttttacaataacagtcttcctccacctccatcgtctcatttaattgccaccaatttgaagcatc\n+# tgcaggacacagactctaggagttgttatcacaacacaactggcggtcgattttccattcccaattgtacgccatctctacatttagactacaat'..b'agt\n+# tttcgtgttggacgaatattgcgctcgctatggtgtgcggggatgctaccgacatttatgctacctttctgatttactggatcgtgcagaaaagcaac\n+# acatgatagatccaacactaattcactattcatttgcgttttgcgcaagccacgttcacggaaatcgacctgatggggtaggaagcattacgcatgag\n+# gaaaaggaaaaattttctgaaatcaaagaacgcctacgtcagttactggagtttcaaataaccaattttagatactgttttcctttcggtcgccccga\n+# aggcgctcttaaagcgacattatctttactagagagagtgctaatgaaagacattgttacccctgttccacctgaagaagttcgtcaaatgattaaaa\n+# aaagtttagagacggcagcgcttgtaaattacacccgtctctccaataaagctaagattgaaggcacttttccgttcaagggacacgtgccagggaaa\n+# tcctatcggaatttcaaaatttttccaagcactttcctgtacacgggaaacgtcccagggaaagcctatcggaaaggtcccagggaaatcccgtcgga\n+# tattcaggatttgcgaggagaggttattgttccgcccccaaaaaaactagaggacctaattcacttagcagaactttgtgttgatctgttgcaacaaa\n+# atgaagagcactatggagaactgcgcaaacatgacaaaatggataaaattaaaatgcgtaaggaagatgatgatgtaccaaaaggccacaatgaaagc\n+# gatattgatttaaccgccaatactggactcagtagcacatcagacctggcttctgcagcatcaactaatggatcgtcatttcgttattataatttgag\n+# gaatgggcgttttcatcagcacctgcgagacacattcgcaccattagtcgtgcggtacgtggatctgatggaatcttcaatagctcagtcgattcata\n+# agggatttgaaaaagaacgctgggaaagtaaagggaacggatgtgccacctctgaagacttattttggaaactagatgctctacagtcgtttataaga\n+# gacctgcactggccagacgcagagtttcgacaacatttagaacagcgtcttaaaatgatggccgtcgatatgatagagcaatgtatacaacgaactga\n+# ttcgtcttttcagtcgtggctaaaaaaaaacattgccttcatatcaactgattatattttaccttcagaaatgtgcgctatggtcaatgtgatattag\n+# atgctaaaaatcaaagctttaaattgactactattgacggcattgatttgtataaatttcatgcaaaaattgacgaccaaatcgacaaagcgaatgta\n+# gctatgacacaaggtctaactggtaaacttatgtcagtgctagagtcgactttgtcaaaattagcacgatacgacgaaggtagcctaatcggctcgat\n+# tcttagttttacaaatgtatcgagctcgggaaaggatctcgggcaaggatatgtaaatttctttagaaataatatggatcaagtacgaggaaaaattg\n+# gcgacgatttatggaccctgaatttctttgagcagtggtactcgcagcagattaacatgctatgtaattggctttcggaacgtttggaccacgctctg\n+# cactacgctcaagtttcatctatttctcacattatcaagaaaatatattcagacttcgaattacaaggtgtattagaagataaattaaactctaaagc\n+# atatcaagcagtcgcacagcgaatggcgacagaggaagcgacatgtgctttgacaatgcctgatgttagcgaagatgaaccctgtgacgacattcgag\n+# aaggggaagaagaagatactggcgacgaatctacctctaacataccaaggggcttaccaaaaccaaaaattgctgccgctcaagctgctgctgttacc\n+# aacgttgttgccggccgtgtgggtaatttactcggcaaaggcattggcggccttagttcaaagttgggaagtggaagttggttttaa]\n+# protein sequence = [MIDPSSSEEEGEDDPIANVSSKGRLTHAPKGTNTVSILGGVSGPGVGSNMAISGSNGDLAGNQRQSNISSISNRNDAG\n+# NVAGVGGSSNKNEQIHGSRVDGGNLEVPNSCIPSGVSQETLNQSIGSSRANSLPRPLSPSPSLTSEKPDTGDPHAFLKGETQIMADEAFQNAVQSYHD\n+# VFLKSERVLKMVQSGASSQHDFREVFRNNIEKRVRSLPEIDGLSKETVLTSWMAKFDIILKGTGEEDSKRPSRMQQSLNSELILSKEQLYDMFQQILL\n+# VKKFEHQILYNALMLDSADEQAAAIRRELDGRMQRVGEMEKNRKLMPKFVLKEMESLYVEELKSSINLLMANLESLPVSKGNMDSKYGLQKLKRYNHR\n+# KLILRSHGSLSKLEGDSEDGSTQLTKLDVVLTFQLEVIVMEVKGLKSLAPNRIVYCTMEVENGEKLQTDQAEASKPMWDTQGDFTTTHPLPVVKVKLY\n+# TENPGMLALEDKELGKVILKPTPLSSKSPEWHRMVIPKNLPDQDIRIKIACRLDKPLNMKHCGYKEKKSEPSEMMQLDGYTVDYIEAASANLMFGIDL\n+# NGGRFFFNAVREGDSISFACDDENECSLWVMAMYRATGQSHKPTPPITQDKNSAMSKIQGARVAPDGSIFLWASFFVAAATRNSSSYHWFRVGGRLNP\n+# MVVSAAARMEWDMCAQSPLASTWRGIGWWSSSERGQPYDLVDVEVPKSGATHYHVSCRGEVNQPETVVRDGLMEAESTDGGVKDAFATELCVEVPEDK\n+# LHVVVWGWFSPGQVFVLDEYCARYGVRGCYRHLCYLSDLLDRAEKQHMIDPTLIHYSFAFCASHVHGNRPDGVGSITHEEKEKFSEIKERLRQLLEFQ\n+# ITNFRYCFPFGRPEGALKATLSLLERVLMKDIVTPVPPEEVRQMIKKSLETAALVNYTRLSNKAKIEGTFPFKGHVPGKSYRNFKIFPSTFLYTGNVP\n+# GKAYRKGPREIPSDIQDLRGEVIVPPPKKLEDLIHLAELCVDLLQQNEEHYGELRKHDKMDKIKMRKEDDDVPKGHNESDIDLTANTGLSSTSDLASA\n+# ASTNGSSFRYYNLRNGRFHQHLRDTFAPLVVRYVDLMESSIAQSIHKGFEKERWESKGNGCATSEDLFWKLDALQSFIRDLHWPDAEFRQHLEQRLKM\n+# MAVDMIEQCIQRTDSSFQSWLKKNIAFISTDYILPSEMCAMVNVILDAKNQSFKLTTIDGIDLYKFHAKIDDQIDKANVAMTQGLTGKLMSVLESTLS\n+# KLARYDEGSLIGSILSFTNVSSSGKDLGQGYVNFFRNNMDQVRGKIGDDLWTLNFFEQWYSQQINMLCNWLSERLDHALHYAQVSSISHIIKKIYSDF\n+# ELQGVLEDKLNSKAYQAVAQRMATEEATCALTMPDVSEDEPCDDIREGEEEDTGDESTSNIPRGLPKPKIAAAQAAAVTNVVAGRVGNLLGKGIGGLS\n+# SKLGSGSWF]\n+# end gene contig70.g117\n+###\n+# command line:\n+# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /home/galaxy/galaxy/database/files/000/dataset_2.dat --UTR=off --genemodel=complete --species=fly\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/hubaInputs/GTF/dbia3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus/hubaInputs/GTF/dbia3.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/output/augustusDbia3.bb
b
Binary file test-data/augustus/output/augustusDbia3.bb has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustus/workflowInputs/dbia3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustus/workflowInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustusDbia3.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustusDbia3.gff3 Wed Jul 13 13:36:37 2016 -0400
[
b'@@ -0,0 +1,9513 @@\n+##gff-version 3\n+# This output was generated with AUGUSTUS (version 3.1.0).\n+# AUGUSTUS is a gene prediction tool written by Mario Stanke (mario.stanke@uni-greifswald.de),\n+# Oliver Keller, Stefanie K\xc3\xb6nig and Lizzy Gerischer.\n+# Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008),\n+# Using native and syntenically mapped cDNA alignments to improve de novo gene finding\n+# Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013\n+# No extrinsic information on sequences given.\n+# Initialising the parameters using config directory /home/galaxy/galaxy/dependency_dir/augustus/3.1/iuc/package_augustus_3_1/24009970003a/config/ ...\n+# human version. Using default transition matrix.\n+# Looks like /home/galaxy/galaxy/database/files/000/dataset_2.dat is in fasta format.\n+# We have hints for 0 sequences and for 0 of the sequences in the input set.\n+#\n+# ----- prediction on sequence number 1 (length = 45179, name = contig1) -----\n+#\n+# Predicted genes for sequence number 1 on both strands\n+# start gene contig1.g1\n+contig1\tAUGUSTUS\tgene\t641\t23169\t1\t-\t.\tID=contig1.g1\n+contig1\tAUGUSTUS\ttranscript\t641\t23169\t.\t-\t.\tID=contig1.g1.t1;Parent=contig1.g1\n+contig1\tAUGUSTUS\tCDS\t641\t5409\t.\t-\t2\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t13455\t13545\t.\t-\t0\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t17461\t17543\t.\t-\t2\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tCDS\t23019\t23169\t.\t-\t0\tID=contig1.g1.t1.cds;Parent=contig1.g1.t1\n+contig1\tAUGUSTUS\tstart_codon\t23167\t23169\t.\t-\t0\tParent=contig1.g1.t1\n+# coding sequence = [atgcacattcagttgtctactctcgcctttcaggttcagactgcagatgagttcgtcgtcgatgcgcgtgatagacgat\n+# ccttcatccagcgcaggtgtcgacctgcgaattctctccgtacagcgtcacgggaagaatccggaataggagctgtaggtgttgctgataaatgtagt\n+# gcggatgcagaagttttaccgctgcattttgttggtgcgccaggttcgaaactgatgtcgagtcatgcaattgcttttcctataagtagccgagaatt\n+# gaacttacagatatctctaagatttttaatatttgcttacgaatacgctagcgcatgcgaaatttacagccttccgcggtttccatcaacgcctcaat\n+# ttgtcgcagttccattggcggctaatgatgaaaatgcatcaacatatgcatttgtaggtcctgcacgatattcatggaaagaagaggatattttatat\n+# gtgggaacgacgttcacgaacgttggtgattatcgccatgacgttcctgccatttcgtcccgtcggcttgatgatttaaactacgcagagttttcaat\n+# acagcagtcaattataaatatcgatgtcaaatatcgggatcatttcttagtcgattatgtttatggctttaactcttctgaatatgcgtactttgtta\n+# ttgttcaaaaaaaatcacatttagctgatgaggcaggttatgtaacccgtttggctcgaatatgtattacagatcccaattatgacagttatactgaa\n+# ataacagttcagtgtacggccactgaaaatcatattgactacaatatactacgcgatgccaaagtaactccggcaagccaaaaattagctcagaaaat\n+# gggtataaaaaaggacgatcacgtgttagtaactgttttttcgccctcgaaagagataagcgatcagccagaaagcaaatcggctatgtgcatatata\n+# gcataaaagacattgaggacatgtttattgaaaatattcatctgtgctttaacggaaccataaaggatagaaatttgggttatatatcgggcactatc\n+# aatgacggccggtgcccaatagttggctcgctcggtaacatatacaacttttgttctgtaggacttaagataagcggagtttctcctatcactacaca\n+# cgctctctttcattttgataatgtatcagttacgtcaataactgcaacgtcaacgactgatcagcagcattctcttgcttttcttggaaccgacaagg\n+# gattgataaaaaaagttttattatctggtcagaatccaggtgagtacgaagaaatagttgtggatgctggaaatcggatactaccaaacactatgatg\n+# tcgcccaaaaaagatttcctttacgttttatcgcaacgtaaaataactaaactcagaatcgagcattgttctgtatacacaaattgttcagcttgctt\n+# ggagtctcgggaccctttttgtggatggtgttcattggaaaaacggtgcaccgtgcggtcaacatgtcagcgagatacgtcagcatcgcgatggcttt\n+# ctttgggcagtgggcaacagtgtattgagtttgaatcaattatccctgagaaaataccaattactgatctaacacacctgcacctaataattcgaaca\n+# ctgcccgaaccttttaatgcaaaataccgatgtgtctttggaaactctacccctattgacgccgaaatcctggacaatggactcggatgtgctacccc\n+# cccactagatgaaagaccagtaataccaactaatacagaccatgttttggtgccattgtccgttagaagttcagagacaaataaggactttgtatcaa\n+# gattttttgcattctttgactgttcgcatcatggaaattgccaggaatgtttacaaagttcatggggctgcaactggtgtatttttgacaataaatgt\n+# gtccatcaatcaatacaatgtcgtaatatagaaaattcggtaactagtgttggtcaatgcccccatttaaaaagcaatcgtccggcgattcttttacc\n+# ggtgcgggtgccaaaagaaattcgtttagagatagaaaacttaccaaaacccaaaagcgctcacgctggattcttgtgtacagttcatattgaagctg\n+# ctcagatgctattgcctgcccacattgagtcaaacaagattgttgtttgtgaaaaaacaccttatttctacgagactaatacacatgaataccaagca\n+# aaggttgtaattacatggaatttccagcactatgtggacacggcgattgttaca'..b'=contig70.g235\n+contig70\tAUGUSTUS\ttranscript\t24505\t50605\t.\t+\t.\tID=contig70.g235.t1;Parent=contig70.g235\n+contig70\tAUGUSTUS\tstart_codon\t24505\t24507\t.\t+\t0\tParent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t24505\t24577\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t27274\t27411\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t28524\t28636\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t33015\t33225\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t38419\t38560\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t45726\t46035\t.\t+\t1\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t46098\t46478\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t46932\t47068\t.\t+\t0\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t47124\t47263\t.\t+\t1\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+contig70\tAUGUSTUS\tCDS\t50403\t50605\t.\t+\t2\tID=contig70.g235.t1.cds;Parent=contig70.g235.t1\n+# coding sequence = [atgattaaaaaaagtttagagacggcagcgcttgtaaattacacccgtctctccaataaagctaagattgaaggcactt\n+# ttccgttcaagggacacgtgccagggaaatcctatcggaatttcaaaatttttccaagcactttcctgtacacgggaaacgtcccagggaaagcctat\n+# cggaaaggtcccagggaaatcccgtcggatattcaggatttgcgaggagaggttattgttccgcccccaaaaaaactagaggacctaattcacttagc\n+# agaactttgtgttgatctgttgcaacaaaatgaagagcactatggagaactgcgcaaacatgacaaaatggataaaattaaaatgcgtaaggaagatg\n+# atgatgtaccaaaaggccacaatgaaagcgatattgatttaaccgccaatactggactcagtagcacatcagacctggcttctgcagcatcaactaat\n+# ggatcgtcatttcgttattgtatgccgacacatgcagtatacaccacgccagtaccaacggcatataatttgaggaatgggcgttttcatcagcacct\n+# gcgagacacattcgcaccattagtcgtgcggtacgtggatctgatggaatcttcaatagctcagtcgattcataagggatttgaaaaagaacgctggg\n+# aaagtaaagggaacggatgtgccacctctgaagacttattttggaaactagatgctctacagtcgtttataagagacctgcactggccagacgcagag\n+# tttcgacaacatttagaacagcgtcttaaaatgatggccgtcgatatgatagagcaatgtatacaacgaactgattcgtcttttcagtcgtggctaaa\n+# aaaaaacattgccttcatatcaactgattatattttaccttcagaaatgtgcgctatggtcaatgtgatattagatgctaaaaatcaaagctttaaat\n+# tgactactattgacggcattgatttgtataaatttcatgcaaaaattgacgaccaaatcgacaaagcgaatgtagctatgacacaaggtctaactggt\n+# aaacttatgtcagtgctagagtcgactttgtcaaaattagcacgatacgacgaaggtagcctaatcggctcgattcttagttttacaaatgtatcgag\n+# ctcgggaaaggatctcgggcaaggatatgtaaatttctttagaaataatatggatcaagtacgaggaaaaattggcgacgatttatggaccctgaatt\n+# tctttgagcagtggtactcgcagcagattaacatgctatgtaattggctttcggaacgtttggaccacgctctgcactacgctcaagtttcatctatt\n+# tctcacattatcaagaaaatatattcagacttcgaattacaaggtgtattagaagataaattaaactctaaagcatatcaagcagtcgcacagcgaat\n+# ggcgacagaggaagcgacatgtgctttgacaatgcctgatgttagcgaagatgaaccctgtgacgacattcgagaaggggaagaagaagatactggcg\n+# acgaatctacctctaacataccaaggggcttaccaaaaccaaaaattgctgccgctcaagctgctgctgttaccaacgttgttgccggccgtgtggca\n+# accggaacatcaccagcaggtagtgccacattaatccgccttgaccagcaacaacaagagcgatacagacggcagcaggatcagttgcacgatgagca\n+# gcaagaaagccatgcccgacaaaagcaggaacttgctcttgcatcttacaccccagggacggtcgtcaatggattgggtcaatcatcagtcacggcaa\n+# gttag]\n+# protein sequence = [MIKKSLETAALVNYTRLSNKAKIEGTFPFKGHVPGKSYRNFKIFPSTFLYTGNVPGKAYRKGPREIPSDIQDLRGEVI\n+# VPPPKKLEDLIHLAELCVDLLQQNEEHYGELRKHDKMDKIKMRKEDDDVPKGHNESDIDLTANTGLSSTSDLASAASTNGSSFRYCMPTHAVYTTPVP\n+# TAYNLRNGRFHQHLRDTFAPLVVRYVDLMESSIAQSIHKGFEKERWESKGNGCATSEDLFWKLDALQSFIRDLHWPDAEFRQHLEQRLKMMAVDMIEQ\n+# CIQRTDSSFQSWLKKNIAFISTDYILPSEMCAMVNVILDAKNQSFKLTTIDGIDLYKFHAKIDDQIDKANVAMTQGLTGKLMSVLESTLSKLARYDEG\n+# SLIGSILSFTNVSSSGKDLGQGYVNFFRNNMDQVRGKIGDDLWTLNFFEQWYSQQINMLCNWLSERLDHALHYAQVSSISHIIKKIYSDFELQGVLED\n+# KLNSKAYQAVAQRMATEEATCALTMPDVSEDEPCDDIREGEEEDTGDESTSNIPRGLPKPKIAAAQAAAVTNVVAGRVATGTSPAGSATLIRLDQQQQ\n+# ERYRRQQDQLHDEQQESHARQKQELALASYTPGTVVNGLGQSSVTAS]\n+# end gene contig70.g235\n+###\n+# command line:\n+# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /home/galaxy/galaxy/database/files/000/dataset_2.dat --UTR=off --genemodel=complete --species=human\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/augustusOutput.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/augustusOutput.html Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,1 @@
+<html><body><p>The following generated by Hub Archive Creator:</p><ul><li><a href="./myHub.zip">./myHub.zip</a></li><li><a href="myHub/dbia.html">myHub/dbia.html</a></li><li><a href="myHub/hub.txt">myHub/hub.txt</a></li><li><a href="myHub/genomes.txt">myHub/genomes.txt</a></li><li><a href="myHub/dbia3/dataset_7.2bit">myHub/dbia3/dataset_7.2bit</a></li><li><a href="myHub/dbia3/groups.txt">myHub/dbia3/groups.txt</a></li><li><a href="myHub/dbia3/trackDb.txt">myHub/dbia3/trackDb.txt</a></li><li><a href="myHub/dbia3/description.html">myHub/dbia3/description.html</a></li><li><a href="myHub/dbia3/tracks/augustusDbia3.bb">myHub/dbia3/tracks/augustusDbia3.bb</a></li><ul></body></html>
\ No newline at end of file
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/dbia3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dbia3.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/glimmerHMM_output.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/glimmerHMM_output.gff3 Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,22 @@
+##gff-version 3
+##sequence-region contig1 1 45179
+contig1 GlimmerHMM mRNA 641 20329 . - . ID=contig1.path1.gene1;Name=contig1.path1.gene1
+contig1 GlimmerHMM CDS 641 5409 . - 2 ID=contig1.cds1.1;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=final-exon
+contig1 GlimmerHMM CDS 12541 12968 . - 1 ID=contig1.cds1.2;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon
+contig1 GlimmerHMM CDS 14821 14944 . - 2 ID=contig1.cds1.3;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon
+contig1 GlimmerHMM CDS 15123 15424 . - 1 ID=contig1.cds1.4;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon
+contig1 GlimmerHMM CDS 17402 17543 . - 2 ID=contig1.cds1.5;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon
+contig1 GlimmerHMM CDS 17886 17986 . - 1 ID=contig1.cds1.6;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=internal-exon
+contig1 GlimmerHMM CDS 20226 20329 . - 0 ID=contig1.cds1.7;Parent=contig1.path1.gene1;Name=contig1.path1.gene1;Note=initial-exon
+contig1 GlimmerHMM mRNA 22912 26939 . - . ID=contig1.path1.gene2;Name=contig1.path1.gene2
+contig1 GlimmerHMM CDS 22912 23136 . - 0 ID=contig1.cds2.1;Parent=contig1.path1.gene2;Name=contig1.path1.gene2;Note=final-exon
+contig1 GlimmerHMM CDS 23431 23705 . - 2 ID=contig1.cds2.2;Parent=contig1.path1.gene2;Name=contig1.path1.gene2;Note=internal-exon
+contig1 GlimmerHMM CDS 26810 26939 . - 0 ID=contig1.cds2.3;Parent=contig1.path1.gene2;Name=contig1.path1.gene2;Note=initial-exon
+contig1 GlimmerHMM mRNA 29101 41509 . - . ID=contig1.path1.gene3;Name=contig1.path1.gene3
+contig1 GlimmerHMM CDS 29101 29152 . - 1 ID=contig1.cds3.1;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=final-exon
+contig1 GlimmerHMM CDS 31365 33826 . - 0 ID=contig1.cds3.2;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon
+contig1 GlimmerHMM CDS 33880 34044 . - 0 ID=contig1.cds3.3;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon
+contig1 GlimmerHMM CDS 34107 34619 . - 0 ID=contig1.cds3.4;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon
+contig1 GlimmerHMM CDS 34674 35311 . - 2 ID=contig1.cds3.5;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon
+contig1 GlimmerHMM CDS 35384 35766 . - 1 ID=contig1.cds3.6;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=internal-exon
+contig1 GlimmerHMM CDS 41472 41509 . - 0 ID=contig1.cds3.7;Parent=contig1.path1.gene3;Name=contig1.path1.gene3;Note=initial-exon
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.bb
b
Binary file test-data/tblastN/dbia3.xml.bb has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.sorted.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/dbia3.xml.sorted.bed Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,10 @@
+contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0,
+contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0,
+contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278,
+contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig1 41470 41509 ci-PA 1000 - 41470 41509 0 1 39, 0,
+contig1 41470 41509 ci-PC 1000 - 41470 41509 0 1 39, 0,
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.unbb.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/dbia3.xml.unbb.bed Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,10 @@
+contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0,
+contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0,
+contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278,
+contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig1 41470 41509 ci-PA 1000 - 41470 41509 0 1 39, 0,
+contig1 41470 41509 ci-PC 1000 - 41470 41509 0 1 39, 0,
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/dbia3.xml.unsorted.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/dbia3.xml.unsorted.bed Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,10 @@
+contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0,
+contig1 41470 41509 ci-PA 1000 - 41470 41509 0 1 39, 0,
+contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig1 41470 41509 ci-PC 1000 - 41470 41509 0 1 39, 0,
+contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278,
+contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0,
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/hubaInputs/tblastn_Dbia3_ci.xml.sorted.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/hubaInputs/tblastn_Dbia3_ci.xml.sorted.bed Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,50 @@
+contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0,
+contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0,
+contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278,
+contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig2 16199 18659 ci-PA 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig2 16199 18659 ci-PB 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig2 18601 18889 ci-PC 0 - 18601 18889 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig2 18711 20598 ci-PC 584 - 18711 20598 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig2 18712 18877 ci-PA 964 - 18712 18877 0 1 165, 0,
+contig2 18712 18877 ci-PB 964 - 18712 18877 0 1 165, 0,
+contig2 18939 20268 ci-PB 560 - 18939 20268 0 4 510,210,432,51, 0,564,774,1278,
+contig2 18939 20598 ci-PA 604 - 18939 20598 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig23 23849 24035 ci-PA 0 - 23849 24035 0 3 51,21,96, 0,51,90,
+contig23 23849 24035 ci-PB 0 - 23849 24035 0 3 51,21,96, 0,51,90,
+contig23 32405 32564 ci-PA 0 - 32405 32564 0 2 63,96, 0,63,
+contig23 32405 32564 ci-PB 0 - 32405 32564 0 2 63,96, 0,63,
+contig23 32405 32573 ci-PA 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153,
+contig23 32405 32573 ci-PB 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153,
+contig23 32471 32564 ci-PC 0 - 32471 32564 0 1 93, 0,
+contig24 3846 4032 ci-PA 0 - 3846 4032 0 3 51,21,96, 0,51,90,
+contig24 3846 4032 ci-PB 0 - 3846 4032 0 3 51,21,96, 0,51,90,
+contig24 12402 12561 ci-PA 0 - 12402 12561 0 2 63,96, 0,63,
+contig24 12402 12561 ci-PB 0 - 12402 12561 0 2 63,96, 0,63,
+contig24 12402 12570 ci-PA 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153,
+contig24 12402 12570 ci-PB 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153,
+contig24 12468 12561 ci-PC 0 - 12468 12561 0 1 93, 0,
+contig66 33180 33312 ci-PA 0 - 33180 33312 0 1 132, 0,
+contig66 33180 33312 ci-PB 0 - 33180 33312 0 1 132, 0,
+contig66 33204 33303 ci-PA 0 - 33204 33303 0 1 99, 0,
+contig66 33204 33303 ci-PB 0 - 33204 33303 0 1 99, 0,
+contig66 35474 35663 ci-PA 0 - 35474 35663 0 1 189, 0,
+contig66 35474 35663 ci-PB 0 - 35474 35663 0 1 189, 0,
+contig66 35516 35762 ci-PA 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153,
+contig66 35516 35762 ci-PB 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153,
+contig66 35534 35756 ci-PA 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186,
+contig66 35534 35756 ci-PB 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186,
+contig67 18177 18309 ci-PA 0 - 18177 18309 0 1 132, 0,
+contig67 18177 18309 ci-PB 0 - 18177 18309 0 1 132, 0,
+contig67 18201 18300 ci-PA 0 - 18201 18300 0 1 99, 0,
+contig67 18201 18300 ci-PB 0 - 18201 18300 0 1 99, 0,
+contig67 20471 20660 ci-PA 0 - 20471 20660 0 1 189, 0,
+contig67 20471 20660 ci-PB 0 - 20471 20660 0 1 189, 0,
+contig67 20513 20759 ci-PA 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153,
+contig67 20513 20759 ci-PB 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153,
+contig67 20531 20753 ci-PA 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186,
+contig67 20531 20753 ci-PB 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186,
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/readme/README.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/readme/README.html Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,36 @@
+<h1 id="conversion-of-ncbi-blast-tblastn-results-to-psl-format">Conversion of NCBI BLAST+ tblastn results to PSL format</h1>
+<p>Wilson Leung <script type="text/javascript">
+<!--
+h='&#x77;&#x75;&#x73;&#116;&#108;&#46;&#x65;&#100;&#x75;';a='&#64;';n='&#x77;&#108;&#x65;&#x75;&#110;&#x67;';e=n+a+h;
+document.write('<a h'+'ref'+'="ma'+'ilto'+':'+e+'" clas'+'s="em' + 'ail">'+e+'<\/'+'a'+'>');
+// -->
+</script><noscript>&#x77;&#108;&#x65;&#x75;&#110;&#x67;&#32;&#x61;&#116;&#32;&#x77;&#x75;&#x73;&#116;&#108;&#32;&#100;&#x6f;&#116;&#32;&#x65;&#100;&#x75;</noscript></p>
+<p>Last Update: 04/24/2016</p>
+<h2 id="version-information">Version information</h2>
+<ul>
+<li>Kent source tree: v324</li>
+<li>NCBI BLAST+: BLAST 2.2.30+</li>
+</ul>
+<h2 id="data-sources">Data sources</h2>
+<p>For testing purposes, the database consists of only contig1 in the Dbia3 assembly while the protein sequences correspond to the three isoforms of the <em>D. melanogaster</em> <em>ci</em> gene in contig1. The protein sequences are available through <a href="http://flybase.org/cgi-bin/getseq.html?source=dmel&amp;id=FBgn0004859&amp;chr=4&amp;dump=PrecompiledFasta&amp;targetset=translation">FlyBase</a>.</p>
+<ul>
+<li>Dbia3.fa = contig1 sequence in the Dbia3 asssembly</li>
+<li>ci.pep = Protein sequences for the three isoforms of the <em>ci</em> gene in <em>D. melanogaster</em></li>
+</ul>
+<h2 id="conversion-protocol">Conversion protocol</h2>
+<ol style="list-style-type: decimal">
+<li><p>Create BLAST database for the assembly</p>
+<pre><code>makeblastdb -in Dbia3.fa -dbtype nucl</code></pre></li>
+<li><p>Perform tblastn search and output results in XML format</p>
+<pre><code>tblastn -outfmt 5 -db Dbia3.fa -query ci.pep -out tblastn_Dbia3_ci.xml -evalue 1e-2</code></pre></li>
+<li><p>Convert results into PSL format</p>
+<pre><code>blastXmlToPsl -convertToNucCoords tblastn_Dbia3_ci.xml tblastn_Dbia3_ci.xml.psl</code></pre></li>
+<li><p>Convert PSL output into BED format</p>
+<pre><code>pslToBed tblastn_Dbia3_ci.xml.psl tblastn_Dbia3_ci.xml.bed</code></pre></li>
+</ol>
+<h2 id="output-files">Output files</h2>
+<ul>
+<li>tblastn_Dbia3_ci.xml = tblastn results in XML format</li>
+<li>tblastn_Dbia3_ci.xml.psl = tblastn results in PSL format</li>
+<li>tblastn_Dbia3_ci.xml.bed = tblastn results in BED format</li>
+</ul>
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/readme/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/readme/README.md Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,48 @@
+Conversion of NCBI BLAST+ tblastn results to PSL format
+=======================================================
+Wilson Leung <wleung@wustl.edu>
+
+Last Update: 04/24/2016
+
+
+Version information
+-------------------
+* Kent source tree: v324
+* NCBI BLAST+: BLAST 2.2.30+
+
+Data sources
+-------------------
+For testing purposes, the database consists of only contig1 in the Dbia3 assembly while the protein sequences correspond to the three isoforms of the *D. melanogaster* *ci* gene in contig1. The protein sequences are available through [FlyBase](http://flybase.org/cgi-bin/getseq.html?source=dmel&id=FBgn0004859&chr=4&dump=PrecompiledFasta&targetset=translation).
+
+* Dbia3.fa = contig1 sequence in the Dbia3 asssembly
+* ci.pep = Protein sequences for the three isoforms of the *ci* gene in *D. melanogaster*
+
+Conversion protocol
+-----------------------
+1. Create BLAST database for the assembly
+```
+makeblastdb -in Dbia3.fa -dbtype nucl
+```
+
+2. Perform tblastn search and output results in XML format
+```
+tblastn -outfmt 5 -db Dbia3.fa -query ci.pep -out tblastn_Dbia3_ci.xml -evalue 1e-2
+```
+
+3. Convert results into PSL format
+```
+blastXmlToPsl -convertToNucCoords tblastn_Dbia3_ci.xml tblastn_Dbia3_ci.xml.psl
+```
+
+4. Convert PSL output into BED format
+```
+pslToBed tblastn_Dbia3_ci.xml.psl tblastn_Dbia3_ci.xml.bed
+```
+
+Output files
+-----------------------
+* tblastn_Dbia3_ci.xml = tblastn results in XML format
+* tblastn_Dbia3_ci.xml.psl = tblastn results in PSL format
+* tblastn_Dbia3_ci.xml.bed = tblastn results in BED format
+
+
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/ci.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/workflowInputs/ci.pep Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,45 @@
+>ci-PA FBpp0088245
+MDAYALPTYFPLAYSELQFLASRRAAAVAAAATVLPGSPCINQHHPTDVSSSVTVPSIIPTGGTSDSIKTSIQPQICNEN
+TLLGNAGHQHNHQPQHVHNINVTGQPHDFHPAYRIPGYMEQLYSLQRTNSASSFHDPYVNCASAFHLAGLGLGSADFLGS
+RGLSSLGELHNAAVAAAAAGSLASTDFHFSVDGNRRLGSPRPPGGSIRASISRKRALSSSPYSDSFDINSMIRFSPNSLA
+TIMNGSRGSSAASGSYGHISATALNPMSHVHSTRLQQIQAHLLRASAGLLNPMTPQQVAASGFSIGHMPTSASLRVNDVH
+PNLSDSHIQITTSPTVTKDVSQVPAAAFSLKNLDDAREKKGPFKDVVPEQPSSTSGGVAQVEADSASSQLSDRCYNNVVN
+NITGIPGDVKVNSRLDEYINCGSISIPSNEYDCANADTTDIKDEPGDFIETNCHWRSCRIEFITQDELVKHINNDHIQTN
+KKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEKPYTCEYPGCSKAFSN
+ASDRAKHQNRTHSNEKPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRLQQNNSRHNLQEHNID
+SSPCSEDSHLGKMLGTSSPSIKSESDISSSNHHLVNGVRASDSLLTYSPDDLAENLNLDDGWNCDDDVDVADLPIVLRAM
+VNIGNGNASASTIGGSVLARQRFRGRLQTKGINSSTIMLCNIPESNRTFGISELNQRITELKMEPGTDAEIKIPKLPNTT
+IGGYTEDPLQNQTSFRNTVSNKQGTVSGSIQGQFRRDSQNSTASTYYGSMQSRRSSQSSQVSSIPTMRPNPSCNSTASFY
+DPISPGCSRRSSQMSNGANCNSFTSTSGLPVLNKESNKSLNACINKPNIGVQGVGIYNSSLPPPPSSHLIATNLKRLQRK
+DSEYHNFTSGRFSVPSYMHSLHIKNNKPVGENEFDKAIASNARRQTDPVPNINLDPLTNISRFSTTPHSFDINVGKTNNI
+ASSINKDNLRKDLFTVSIKADMAMTSDQHPNERINLDEVEELILPDEMLQYLNLVKDDTNHLEKEHQAVPVGSNVSETIA
+SNHYREQSNIYYTNKQILTPPSNVDIQPNTTKFTVQDKFAMTAVGGSFSQRELSTLAVPNEHGHAKCESFHHQSQKYMNT
+DIGSKQQSALPSAHQRQTEKSNYNQIIDSSMTSLPELNVDSIYPRNETENIFKVHGDHDNEIQCGIISQSQMSPSTNLNN
+DGQFSTVNMQPITTSKLFPPEPQKIVCDTQASNTSVMHLDTYQRTLEYVQSCQNWMETNNTSTNQIQSLPGMPVNNTLFP
+DVSSSTHPYHGTNMVINDMTTSLTSLLEENRYLQMMQ
+>ci-PC FBpp0300417
+MDAYALPTYFPLAYSELQFLASRRAAAVAAAATVLPGSPCINQHHPTDVSSSVTVPSIIPTGGTSDSIKTSIQPQICNEN
+TLLGNAGHQHNHQPQHVHNINVTGQPHDFHPAYRIPGYMEQLYSLQRTNSASSFHDPYVNCASAFHLAGLGLGSADFLGS
+RGLSSLGELHNAAVAAAAAGSLASTDFHFSVDGNRRLGSPRPPGGSIRASISRKRALSSSPYSDSFDINSMIRFSPNSLA
+TIMNGSRGSSAASGSYGHISATALNPMSHVHSTRLQQIQAHLLRASAGLLNPMTPQQVAASGFSIGHMPTSASLRVNDVH
+PNLSDSHIQITTSPTVTKDVSQVPAAAFSLKNLDDAREKKGPFKDVVPEQPSSTSGGVAQVEADSASSQLSDRCYNNVVN
+NITGIPGDVKVNSRLDEYINCGSISIPSNEYDCANADTTDIKDEPGDFIETNCHWRSCRIEFITQDELVKHINNDHIQTN
+KKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTEKNHTLASIRDVAKPLVMLVIAQSIKIEHTVMRNRTFVKH
+LDAQNVTPTRAL
+>ci-PB FBpp0297298
+MEQLYSLQRTNSASSFHDPYVNCASAFHLAGLGLGSADFLGSRGLSSLGELHNAAVAAAAAGSLASTDFHFSVDGNRRLG
+SPRPPGGSIRASISRKRALSSSPYSDSFDINSMIRFSPNSLATIMNGSRGSSAASGSYGHISATALNPMSHVHSTRLQQI
+QAHLLRASAGLLNPMTPQQVAASGFSIGHMPTSASLRVNDVHPNLSDSHIQITTSPTVTKDVSQVPAAAFSLKNLDDARE
+KKGPFKDVVPEQPSSTSGGVAQVEADSASSQLSDRCYNNVVNNITGIPGDVKVNSRLDEYINCGSISIPSNEYDCANADT
+TDIKDEPGDFIETNCHWRSCRIEFITQDELVKHINNDHIQTNKKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHK
+CTFEGCFKAYSRLENLKTHLRSHTGEKPYTCEYPGCSKAFSNASDRAKHQNRTHSNEKPYICKAPGCTKRYTDPSSLRKH
+VKTVHGAEFYANKKHKGLPLNDANSRLQQNNSRHNLQEHNIDSSPCSEDSHLGKMLGTSSPSIKSESDISSSNHHLVNGV
+RASDSLLTYSPDDLAENLNLDDGWNCDDDVDVADLPIVLRAMVNIGNGNASASTIGGSVLARQRFRGRLQTKGINSSTIM
+LCNIPESNRTFGISELNQRITELKMEPGTDAEIKIPKLPNTTIGGYTEDPLQNQTSFRNTVSNKQGTVSGSIQGQFRRDS
+QNSTASTYYGSMQSRRSSQSSQVSSIPTMRPNPSCNSTASFYDPISPGCSRRSSQMSNGANCNSFTSTSGLPVLNKESNK
+SLNACINKPNIGVQGVGIYNSSLPPPPSSHLIATNLKRLQRKDSEYHNFTSGRFSVPSYMHSLHIKNNKPVGENEFDKAI
+ASNARRQTDPVPNINLDPLTNISRFSTTPHSFDINVGKTNNIASSINKDNLRKDLFTVSIKADMAMTSDQHPNERINLDE
+VEELILPDEMLQYLNLVKDDTNHLEKEHQAVPVGSNVSETIASNHYREQSNIYYTNKQILTPPSNVDIQPNTTKFTVQDK
+FAMTAVGGSFSQRELSTLAVPNEHGHAKCESFHHQSQKYMNTDIGSKQQSALPSAHQRQTEKSNYNQIIDSSMTSLPELN
+VDSIYPRNETENIFKVHGDHDNEIQCGIISQSQMSPSTNLNNDGQFSTVNMQPITTSKLFPPEPQKIVCDTQASNTSVMH
+LDTYQRTLEYVQSCQNWMETNNTSTNQIQSLPGMPVNNTLFPDVSSSTHPYHGTNMVINDMTTSLTSLLEENRYLQMMQ
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/dbia3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/workflowInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,1174 @@\n+<?xml version="1.0"?>\n+<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">\n+<BlastOutput>\n+  <BlastOutput_program>tblastn</BlastOutput_program>\n+  <BlastOutput_version>TBLASTN 2.3.0+</BlastOutput_version>\n+  <BlastOutput_reference>Stephen F. Altschul, Thomas L. Madden, Alejandro A. Sch&amp;auml;ffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), &quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>\n+  <BlastOutput_db>dbia3.fasta</BlastOutput_db>\n+  <BlastOutput_query-ID>Query_1</BlastOutput_query-ID>\n+  <BlastOutput_query-def>ci-PA FBpp0088245</BlastOutput_query-def>\n+  <BlastOutput_query-len>1397</BlastOutput_query-len>\n+  <BlastOutput_param>\n+    <Parameters>\n+      <Parameters_matrix>BLOSUM62</Parameters_matrix>\n+      <Parameters_expect>0.01</Parameters_expect>\n+      <Parameters_gap-open>11</Parameters_gap-open>\n+      <Parameters_gap-extend>1</Parameters_gap-extend>\n+      <Parameters_filter>L;</Parameters_filter>\n+    </Parameters>\n+  </BlastOutput_param>\n+<BlastOutput_iterations>\n+<Iteration>\n+  <Iteration_iter-num>1</Iteration_iter-num>\n+  <Iteration_query-ID>Query_1</Iteration_query-ID>\n+  <Iteration_query-def>ci-PA FBpp0088245</Iteration_query-def>\n+  <Iteration_query-len>1397</Iteration_query-len>\n+<Iteration_hits>\n+<Hit>\n+  <Hit_num>1</Hit_num>\n+  <Hit_id>gnl|BL_ORD_ID|1</Hit_id>\n+  <Hit_def>contig2</Hit_def>\n+  <Hit_accession>1</Hit_accession>\n+  <Hit_len>45017</Hit_len>\n+  <Hit_hsps>\n+    <Hsp>\n+      <Hsp_num>1</Hsp_num>\n+      <Hsp_bit-score>852.818</Hsp_bit-score>\n+      <Hsp_score>2202</Hsp_score>\n+      <Hsp_evalue>0</Hsp_evalue>\n+      <Hsp_query-from>576</Hsp_query-from>\n+      <Hsp_query-to>1396</Hsp_query-to>\n+      <Hsp_hit-from>16200</Hsp_hit-from>\n+      <Hsp_hit-to>18659</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>-1</Hsp_hit-frame>\n+      <Hsp_identity>540</Hsp_identity>\n+      <Hsp_positive>627</Hsp_positive>\n+      <Hsp_gaps>39</Hsp_gaps>\n+      <Hsp_align-len>840</Hsp_align-len>\n+      <Hsp_qseq>KPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRLQQNNS--RHNLQEHNIDSSPCSEDSHLGKMLGTXXXXXXXXXXXXXXNHHLVNGVRASDSLLTYSPDDLAEXXXXXXXXXXXXXXXXXXXXXXXRAMVNIGNGNASASTIGGSVLARQRFRGRLQTKGINSSTIMLCNIPESNRTFGISELNQRITELKMEPGTDAEIKIPKLPNTTIGGYTEDPLQNQTSFRNTVSNKQG--TVSGSIQGQFRRDSQNSTASTYYGXXXXXXXXXXXXXXXIPTMRPNPSCN-STASFYDPISPGCSRRSSQMSNGANCNSFTSTSGLPVLNKE--SNKSLNACINKPNIGVQGVGIYNXXXXXXXXXHLIATNLKRLQRKDSE--YHNFTSGRFSVPSYMHSLHIKNNKPVGENEFDKAIASNA-RRQTDPVPNINLDPLTNISRFSTTPHSFDINVGKTN--NIASSINKDNLRKDLFTVSIKADMAMTSDQHPNERINLDEVEELILPDEMLQYLNLVKDDTNHLEKEHQAVPVGSNVSETIASNH--YREQSNIYYTNKQILTPPSNVDI----QPNTTKFTVQDKFAMTAVGGSFSQRELSTLAVPNEHGHAKCESFHHQSQKYMNTDIGSKQQSALPSAHQRQTEKSNYNQIIDSSMTSLPELNVDSIYPRNETENIFKVHGDHDNEIQCGIISQSQMSPSTNLNNDGQFSTVNMQPITTSKLF-PPEPQKIVCDTQASNTSVMHLDTYQRTLEYVQSCQNWMETNNTSTNQIQSLPGMPVNNTLFPDVSSSTHPYHGTNMVINDMTTSLTSLLEENRYLQMM</Hsp_qseq>\n+      <Hsp_hseq>KPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRLHRDSGQGRHNLQEHNIDSSPCSEELQVGKLIGMSSPSIKSESDESSPHHQLLSGVRASDCFLTYSQDGAAEHITLDDGWDCDDDVDVADLPIVLRAMVNVGSGNSMAPTIGGGVVARQRFRSRLQTKGINSSMVMLGNIPESNRTIGISELNQRITELKMEPGTACDITVPL--NTGLERISEDLSQNQS---NITLNKQSFFTASGSLQGHFRRDSQNSTASTYYGSMQSRRSSQSSQVSSISTMRPGPSYNTTTASLYDPISPGCSRRSSQMSNVVNSYALTSTSGLTAINKDLNANSSPNASINKPGLGGQYFGFYNNSLPPPPSSHLIATNLKHLQDTDSRSCYHNTTGGRFSIPNCTPSLHLDYNGPAGEQEIDKEIPNNILRRQSEPMPNISLDTLTNVSPLSGPLQNLQFPIGKARNVNITSSSNENTLRKGPCHATMKTEMTMTSEQHPNERINLDEVEELILPDEMLQYLNLVKDDQNYMEKDD--VAIRSTVPKTIKSNENLLLSKSNLNPIKKQIILPTSNFDVSINLQPNTSNLQTQEEHTMTTIGGLPSQREQN--IVPHQHEKTKCRSFPQEIDKTINIDIGFKEQPYPSSAYQPQITKSNQNEIIDSSMTSLPELN--PIFTKINSENVSKLHRDQNSEIQCGIVSQSQMSPSININNDGETSTLKNLPLTYSKFSGQPNTQTTVG---GSNTSSMVSDTYQRTLEYVQSCQNWVDTNNSSGDQIQS------NNTLWSDVSSSTHPYAGTNLVINDMTTSLTSLLEENRYLHMM</Hsp_hseq>\n+      <Hsp_midline>KPYICKAPGCTKRYTDPSSLRKHVKTVHGAEFYANKKHKGLPLNDANSRL +++   R'..b'   <Hsp_midline>H  +K   C  +GC K +     ++ HL +H G + + C    C KAF  +S   +HQ   H+ EKP+ C   GC KR++   +LR</Hsp_midline>\n+    </Hsp>\n+    <Hsp>\n+      <Hsp_num>2</Hsp_num>\n+      <Hsp_bit-score>47.3654</Hsp_bit-score>\n+      <Hsp_score>111</Hsp_score>\n+      <Hsp_evalue>6.65795e-06</Hsp_evalue>\n+      <Hsp_query-from>362</Hsp_query-from>\n+      <Hsp_query-to>441</Hsp_query-to>\n+      <Hsp_hit-from>35535</Hsp_hit-from>\n+      <Hsp_hit-to>35756</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>-1</Hsp_hit-frame>\n+      <Hsp_identity>29</Hsp_identity>\n+      <Hsp_positive>37</Hsp_positive>\n+      <Hsp_gaps>6</Hsp_gaps>\n+      <Hsp_align-len>80</Hsp_align-len>\n+      <Hsp_qseq>NKKAFVCRWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEKPYTCEYPGCSKAFS</Hsp_qseq>\n+      <Hsp_hseq>NDKKIACPHKGC---HKNFRDSSAMRKHLHTH-GPRVHVCA--ECGKAFVESSKLKRHQLVHTGEKPFQCTFEGCGKRFS</Hsp_hseq>\n+      <Hsp_midline>N K   C  + C    K F+    +  H+  H G + H C    C KA+     LK H   HTGEKP+ C + GC K FS</Hsp_midline>\n+    </Hsp>\n+    <Hsp>\n+      <Hsp_num>3</Hsp_num>\n+      <Hsp_bit-score>41.5874</Hsp_bit-score>\n+      <Hsp_score>96</Hsp_score>\n+      <Hsp_evalue>0.000398301</Hsp_evalue>\n+      <Hsp_query-from>384</Hsp_query-from>\n+      <Hsp_query-to>427</Hsp_query-to>\n+      <Hsp_hit-from>33181</Hsp_hit-from>\n+      <Hsp_hit-to>33312</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>-3</Hsp_hit-frame>\n+      <Hsp_identity>16</Hsp_identity>\n+      <Hsp_positive>28</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>44</Hsp_align-len>\n+      <Hsp_qseq>YMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEK</Hsp_qseq>\n+      <Hsp_hseq>FIYRTHVRIHTGDRPFVCPFDACNKKFAQSTNLKSHILTHAKAK</Hsp_hseq>\n+      <Hsp_midline>++   H+R HTG++P  C F+ C K +++  NLK+H+ +H   K</Hsp_midline>\n+    </Hsp>\n+    <Hsp>\n+      <Hsp_num>4</Hsp_num>\n+      <Hsp_bit-score>40.817</Hsp_bit-score>\n+      <Hsp_score>94</Hsp_score>\n+      <Hsp_evalue>0.000632218</Hsp_evalue>\n+      <Hsp_query-from>369</Hsp_query-from>\n+      <Hsp_query-to>431</Hsp_query-to>\n+      <Hsp_hit-from>35475</Hsp_hit-from>\n+      <Hsp_hit-to>35663</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>-1</Hsp_hit-frame>\n+      <Hsp_identity>23</Hsp_identity>\n+      <Hsp_positive>28</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>63</Hsp_align-len>\n+      <Hsp_qseq>RWEDCTRGEKPFKAQYMLVVHMRRHTGEKPHKCTFEGCFKAYSRLENLKTHLRSHTGEKPYTC</Hsp_qseq>\n+      <Hsp_hseq>RVHVCAECGKAFVESSKLKRHQLVHTGEKPFQCTFEGCGKRFSLDFNLRYSTKKFWFSYKFVC</Hsp_hseq>\n+      <Hsp_midline>R   C    K F     L  H   HTGEKP +CTFEGC K +S   NL+   +       + C</Hsp_midline>\n+    </Hsp>\n+    <Hsp>\n+      <Hsp_num>5</Hsp_num>\n+      <Hsp_bit-score>37.7354</Hsp_bit-score>\n+      <Hsp_score>86</Hsp_score>\n+      <Hsp_evalue>0.00636923</Hsp_evalue>\n+      <Hsp_query-from>417</Hsp_query-from>\n+      <Hsp_query-to>449</Hsp_query-to>\n+      <Hsp_hit-from>33205</Hsp_hit-from>\n+      <Hsp_hit-to>33303</Hsp_hit-to>\n+      <Hsp_query-frame>0</Hsp_query-frame>\n+      <Hsp_hit-frame>-3</Hsp_hit-frame>\n+      <Hsp_identity>12</Hsp_identity>\n+      <Hsp_positive>23</Hsp_positive>\n+      <Hsp_gaps>0</Hsp_gaps>\n+      <Hsp_align-len>33</Hsp_align-len>\n+      <Hsp_qseq>KTHLRSHTGEKPYTCEYPGCSKAFSNASDRAKH</Hsp_qseq>\n+      <Hsp_hseq>RTHVRIHTGDRPFVCPFDACNKKFAQSTNLKSH</Hsp_hseq>\n+      <Hsp_midline>+TH+R HTG++P+ C +  C+K F+ +++   H</Hsp_midline>\n+    </Hsp>\n+  </Hit_hsps>\n+</Hit>\n+</Iteration_hits>\n+  <Iteration_stat>\n+    <Statistics>\n+      <Statistics_db-num>70</Statistics_db-num>\n+      <Statistics_db-len>3333194</Statistics_db-len>\n+      <Statistics_hsp-len>96</Statistics_hsp-len>\n+      <Statistics_eff-space>1306438952</Statistics_eff-space>\n+      <Statistics_kappa>0.041</Statistics_kappa>\n+      <Statistics_lambda>0.267</Statistics_lambda>\n+      <Statistics_entropy>0.14</Statistics_entropy>\n+    </Statistics>\n+  </Iteration_stat>\n+</Iteration>\n+</BlastOutput_iterations>\n+</BlastOutput>\n+\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.bed Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,50 @@
+contig2 16199 18659 ci-PA 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig2 18939 20598 ci-PA 604 - 18939 20598 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig2 18712 18877 ci-PA 964 - 18712 18877 0 1 165, 0,
+contig1 31366 33826 ci-PA 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 34106 35765 ci-PA 604 - 34106 35765 0 7 510,210,432,84,63,45,174, 0,564,774,1278,1368,1434,1485,
+contig1 33879 34044 ci-PA 964 - 33879 34044 0 1 165, 0,
+contig24 12402 12561 ci-PA 0 - 12402 12561 0 2 63,96, 0,63,
+contig24 12402 12570 ci-PA 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153,
+contig24 3846 4032 ci-PA 0 - 3846 4032 0 3 51,21,96, 0,51,90,
+contig23 32405 32564 ci-PA 0 - 32405 32564 0 2 63,96, 0,63,
+contig23 32405 32573 ci-PA 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153,
+contig23 23849 24035 ci-PA 0 - 23849 24035 0 3 51,21,96, 0,51,90,
+contig67 20513 20759 ci-PA 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153,
+contig67 20531 20753 ci-PA 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186,
+contig67 18177 18309 ci-PA 0 - 18177 18309 0 1 132, 0,
+contig67 20471 20660 ci-PA 0 - 20471 20660 0 1 189, 0,
+contig67 18201 18300 ci-PA 0 - 18201 18300 0 1 99, 0,
+contig66 35516 35762 ci-PA 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153,
+contig66 35534 35756 ci-PA 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186,
+contig66 33180 33312 ci-PA 0 - 33180 33312 0 1 132, 0,
+contig66 35474 35663 ci-PA 0 - 35474 35663 0 1 189, 0,
+contig66 33204 33303 ci-PA 0 - 33204 33303 0 1 99, 0,
+contig2 18711 20598 ci-PC 584 - 18711 20598 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig2 18601 18889 ci-PC 0 - 18601 18889 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig1 33878 35765 ci-PC 584 - 33878 35765 0 8 99,510,210,432,84,63,48,171, 0,228,792,1002,1506,1596,1662,1716,
+contig1 33768 34056 ci-PC 0 - 33768 34056 0 5 63,36,51,54,45, 0,75,123,180,243,
+contig24 12468 12561 ci-PC 0 - 12468 12561 0 1 93, 0,
+contig23 32471 32564 ci-PC 0 - 32471 32564 0 1 93, 0,
+contig2 16199 18659 ci-PB 358 - 16199 18659 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig2 18939 20268 ci-PB 560 - 18939 20268 0 4 510,210,432,51, 0,564,774,1278,
+contig2 18712 18877 ci-PB 964 - 18712 18877 0 1 165, 0,
+contig1 31366 33826 ci-PB 358 - 31366 33826 0 18 126,114,27,174,186,84,69,45,204,108,117,144,123,150,24,48,495,165, 0,126,240,270,444,630,726,801,846,1056,1167,1290,1440,1566,1722,1746,1794,2295,
+contig1 34106 35435 ci-PB 560 - 34106 35435 0 4 510,210,432,51, 0,564,774,1278,
+contig1 33879 34044 ci-PB 964 - 33879 34044 0 1 165, 0,
+contig24 12402 12561 ci-PB 0 - 12402 12561 0 2 63,96, 0,63,
+contig24 12402 12570 ci-PB 0 - 12402 12570 0 4 21,42,90,15, 0,21,63,153,
+contig24 3846 4032 ci-PB 0 - 3846 4032 0 3 51,21,96, 0,51,90,
+contig23 32405 32564 ci-PB 0 - 32405 32564 0 2 63,96, 0,63,
+contig23 32405 32573 ci-PB 0 - 32405 32573 0 4 21,42,90,15, 0,21,63,153,
+contig23 23849 24035 ci-PB 0 - 23849 24035 0 3 51,21,96, 0,51,90,
+contig67 20513 20759 ci-PB 0 - 20513 20759 0 4 81,51,21,93, 0,81,132,153,
+contig67 20531 20753 ci-PB 0 - 20531 20753 0 4 111,24,51,36, 0,111,135,186,
+contig67 18177 18309 ci-PB 0 - 18177 18309 0 1 132, 0,
+contig67 20471 20660 ci-PB 0 - 20471 20660 0 1 189, 0,
+contig67 18201 18300 ci-PB 0 - 18201 18300 0 1 99, 0,
+contig66 35516 35762 ci-PB 0 - 35516 35762 0 4 81,51,21,93, 0,81,132,153,
+contig66 35534 35756 ci-PB 0 - 35534 35756 0 4 111,24,51,36, 0,111,135,186,
+contig66 33180 33312 ci-PB 0 - 33180 33312 0 1 132, 0,
+contig66 35474 35663 ci-PB 0 - 35474 35663 0 1 189, 0,
+contig66 33204 33303 ci-PB 0 - 33204 33303 0 1 99, 0,
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.psl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tblastN/workflowInputs/tblastn_Dbia3_ci.xml.psl Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,50 @@
+1458 762 183 0 7 60 10 57 +- ci-PA 4191 1725 4188 contig2 45017 16199 18659 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1725,1890,2391,2448,2472,2622,2745,2889,3006,3114,3324,3369,3438,3528,3720,3894,3930,4062, 26358,26529,27024,27072,27102,27255,27384,27534,27654,27768,27972,28023,28104,28188,28374,28551,28578,28692,
+987 300 231 0 1 3 5 141 +- ci-PA 4191 39 1560 contig2 45017 18939 20598 7 174,45,63,84,432,210,510, 39,213,258,321,405,840,1050, 24419,24599,24647,24716,24872,25304,25568,
+162 3 0 0 0 0 0 0 +- ci-PA 4191 1560 1725 contig2 45017 18712 18877 1 165, 1560, 26140,
+1458 762 183 0 7 60 10 57 +- ci-PA 4191 1725 4188 contig1 45179 31366 33826 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1725,1890,2391,2448,2472,2622,2745,2889,3006,3114,3324,3369,3438,3528,3720,3894,3930,4062, 11353,11524,12019,12067,12097,12250,12379,12529,12649,12763,12967,13018,13099,13183,13369,13546,13573,13687,
+987 300 231 0 1 3 5 141 +- ci-PA 4191 39 1560 contig1 45179 34106 35765 7 174,45,63,84,432,210,510, 39,213,258,321,405,840,1050, 9414,9594,9642,9711,9867,10299,10563,
+162 3 0 0 0 0 0 0 +- ci-PA 4191 1560 1725 contig1 45179 33879 34044 1 165, 1560, 11135,
+69 90 0 0 1 6 0 0 +- ci-PA 4191 1467 1632 contig24 40010 12402 12561 2 96,63, 1467,1569, 27449,27545,
+78 90 0 0 3 15 0 0 +- ci-PA 4191 1542 1725 contig24 40010 12402 12570 4 15,90,42,21, 1542,1563,1659,1704, 27440,27455,27545,27587,
+63 105 0 0 1 6 1 18 +- ci-PA 4191 1536 1710 contig24 40010 3846 4032 3 96,21,51, 1536,1632,1659, 35978,36092,36113,
+69 90 0 0 1 6 0 0 +- ci-PA 4191 1467 1632 contig23 50012 32405 32564 2 96,63, 1467,1569, 17448,17544,
+78 90 0 0 3 15 0 0 +- ci-PA 4191 1542 1725 contig23 50012 32405 32573 4 15,90,42,21, 1542,1563,1659,1704, 17439,17454,17544,17586,
+63 105 0 0 1 6 1 18 +- ci-PA 4191 1536 1710 contig23 50012 23849 24035 3 96,21,51, 1536,1632,1659, 25977,26091,26112,
+87 159 0 0 3 12 0 0 +- ci-PA 4191 1530 1788 contig67 44531 20513 20759 4 93,21,51,81, 1530,1626,1653,1707, 23772,23865,23886,23937,
+87 135 0 0 3 18 0 0 +- ci-PA 4191 1437 1677 contig67 44531 20531 20753 4 36,51,24,111, 1437,1482,1536,1566, 23778,23814,23865,23889,
+48 84 0 0 0 0 0 0 +- ci-PA 4191 1503 1635 contig67 44531 18177 18309 1 132, 1503, 26222,
+69 120 0 0 0 0 0 0 +- ci-PA 4191 1458 1647 contig67 44531 20471 20660 1 189, 1458, 23871,
+36 63 0 0 0 0 0 0 +- ci-PA 4191 1602 1701 contig67 44531 18201 18300 1 99, 1602, 26231,
+87 159 0 0 3 12 0 0 +- ci-PA 4191 1530 1788 contig66 45011 35516 35762 4 93,21,51,81, 1530,1626,1653,1707, 9249,9342,9363,9414,
+87 135 0 0 3 18 0 0 +- ci-PA 4191 1437 1677 contig66 45011 35534 35756 4 36,51,24,111, 1437,1482,1536,1566, 9255,9291,9342,9366,
+48 84 0 0 0 0 0 0 +- ci-PA 4191 1503 1635 contig66 45011 33180 33312 1 132, 1503, 11699,
+69 120 0 0 0 0 0 0 +- ci-PA 4191 1458 1647 contig66 45011 35474 35663 1 189, 1458, 9348,
+36 63 0 0 0 0 0 0 +- ci-PA 4191 1602 1701 contig66 45011 33204 33303 1 99, 1602, 11708,
+1050 336 231 0 1 3 6 270 +- ci-PC 1716 39 1659 contig2 45017 18711 20598 8 171,48,63,84,432,210,510,99, 39,210,258,321,405,840,1050,1560, 24419,24596,24647,24716,24872,25304,25568,26207,
+111 138 0 0 0 0 4 39 +- ci-PC 1716 1467 1716 contig2 45017 18601 18889 5 45,54,51,36,63, 1467,1512,1566,1617,1653, 26128,26182,26242,26305,26353,
+1050 336 231 0 1 3 6 270 +- ci-PC 1716 39 1659 contig1 45179 33878 35765 8 171,48,63,84,432,210,510,99, 39,210,258,321,405,840,1050,1560, 9414,9591,9642,9711,9867,10299,10563,11202,
+111 138 0 0 0 0 4 39 +- ci-PC 1716 1467 1716 contig1 45179 33768 34056 5 45,54,51,36,63, 1467,1512,1566,1617,1653, 11123,11177,11237,11300,11348,
+45 48 0 0 0 0 0 0 +- ci-PC 1716 1467 1560 contig24 40010 12468 12561 1 93, 1467, 27449,
+45 48 0 0 0 0 0 0 +- ci-PC 1716 1467 1560 contig23 50012 32471 32564 1 93, 1467, 17448,
+1458 762 183 0 7 60 10 57 +- ci-PB 3837 1371 3834 contig2 45017 16199 18659 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1371,1536,2037,2094,2118,2268,2391,2535,2652,2760,2970,3015,3084,3174,3366,3540,3576,3708, 26358,26529,27024,27072,27102,27255,27384,27534,27654,27768,27972,28023,28104,28188,28374,28551,28578,28692,
+834 264 105 0 1 3 2 126 +- ci-PB 3837 0 1206 contig2 45017 18939 20268 4 51,432,210,510, 0,51,486,696, 24749,24872,25304,25568,
+162 3 0 0 0 0 0 0 +- ci-PB 3837 1206 1371 contig2 45017 18712 18877 1 165, 1206, 26140,
+1458 762 183 0 7 60 10 57 +- ci-PB 3837 1371 3834 contig1 45179 31366 33826 18 165,495,48,24,150,123,144,117,108,204,45,69,84,186,174,27,114,126, 1371,1536,2037,2094,2118,2268,2391,2535,2652,2760,2970,3015,3084,3174,3366,3540,3576,3708, 11353,11524,12019,12067,12097,12250,12379,12529,12649,12763,12967,13018,13099,13183,13369,13546,13573,13687,
+834 264 105 0 1 3 2 126 +- ci-PB 3837 0 1206 contig1 45179 34106 35435 4 51,432,210,510, 0,51,486,696, 9744,9867,10299,10563,
+162 3 0 0 0 0 0 0 +- ci-PB 3837 1206 1371 contig1 45179 33879 34044 1 165, 1206, 11135,
+69 90 0 0 1 6 0 0 +- ci-PB 3837 1113 1278 contig24 40010 12402 12561 2 96,63, 1113,1215, 27449,27545,
+78 90 0 0 3 15 0 0 +- ci-PB 3837 1188 1371 contig24 40010 12402 12570 4 15,90,42,21, 1188,1209,1305,1350, 27440,27455,27545,27587,
+63 105 0 0 1 6 1 18 +- ci-PB 3837 1182 1356 contig24 40010 3846 4032 3 96,21,51, 1182,1278,1305, 35978,36092,36113,
+69 90 0 0 1 6 0 0 +- ci-PB 3837 1113 1278 contig23 50012 32405 32564 2 96,63, 1113,1215, 17448,17544,
+78 90 0 0 3 15 0 0 +- ci-PB 3837 1188 1371 contig23 50012 32405 32573 4 15,90,42,21, 1188,1209,1305,1350, 17439,17454,17544,17586,
+63 105 0 0 1 6 1 18 +- ci-PB 3837 1182 1356 contig23 50012 23849 24035 3 96,21,51, 1182,1278,1305, 25977,26091,26112,
+87 159 0 0 3 12 0 0 +- ci-PB 3837 1176 1434 contig67 44531 20513 20759 4 93,21,51,81, 1176,1272,1299,1353, 23772,23865,23886,23937,
+87 135 0 0 3 18 0 0 +- ci-PB 3837 1083 1323 contig67 44531 20531 20753 4 36,51,24,111, 1083,1128,1182,1212, 23778,23814,23865,23889,
+48 84 0 0 0 0 0 0 +- ci-PB 3837 1149 1281 contig67 44531 18177 18309 1 132, 1149, 26222,
+69 120 0 0 0 0 0 0 +- ci-PB 3837 1104 1293 contig67 44531 20471 20660 1 189, 1104, 23871,
+36 63 0 0 0 0 0 0 +- ci-PB 3837 1248 1347 contig67 44531 18201 18300 1 99, 1248, 26231,
+87 159 0 0 3 12 0 0 +- ci-PB 3837 1176 1434 contig66 45011 35516 35762 4 93,21,51,81, 1176,1272,1299,1353, 9249,9342,9363,9414,
+87 135 0 0 3 18 0 0 +- ci-PB 3837 1083 1323 contig66 45011 35534 35756 4 36,51,24,111, 1083,1128,1182,1212, 9255,9291,9342,9366,
+48 84 0 0 0 0 0 0 +- ci-PB 3837 1149 1281 contig66 45011 33180 33312 1 132, 1149, 11699,
+69 120 0 0 0 0 0 0 +- ci-PB 3837 1104 1293 contig66 45011 35474 35663 1 189, 1104, 9348,
+36 63 0 0 0 0 0 0 +- ci-PB 3837 1248 1347 contig66 45011 33204 33303 1 99, 1248, 11708,
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/hubaInputs/dbia3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trfBig/hubaInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/hubaInputs/dbia3_trfBig_sorted.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trfBig/hubaInputs/dbia3_trfBig_sorted.bed Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,602 @@\n+contig1\t9130\t9428\ttrf\t163\t1.8\t164\t86\t2\t438\t36\t12\t13\t37\t1.82\tAAAAAAAATTATATCTTCGGTGTTTTTCAACATACAACCTCCTAAGCTTGGAAATAACATTTCTTAATCAGTTCTGAATTTCGAATTAAATTTTTATCAAAATCGGACAACTATACCATATAGCTGTCATAGGAAGGATTGGATAATTAGTGGTAAAATAATAT\n+contig1\t15707\t15757\ttrf\t20\t2.5\t19\t83\t9\t55\t32\t12\t2\t54\t1.49\tTATATCATTTATATCTCAG\n+contig1\t16261\t16302\ttrf\t12\t3.4\t12\t89\t0\t64\t2\t36\t24\t36\t1.69\tTCCGTCTGTCCG\n+contig1\t23387\t23483\ttrf\t45\t2.2\t45\t92\t1\t158\t19\t22\t30\t27\t1.98\tTCGGCGAAGAAATGTGCCACTTCGGCGGCACTTCTTGGAGTCACT\n+contig1\t23451\t23520\ttrf\t24\t2.9\t24\t80\t0\t75\t15\t26\t27\t30\t1.96\tTTCGGCGACACTTCTTGAAGTCAG\n+contig1\t24200\t24233\ttrf\t15\t2.2\t15\t94\t0\t57\t51\t12\t15\t21\t1.75\tAAACAATGGAATGCT\n+contig1\t29159\t29628\ttrf\t162\t2.9\t160\t80\t8\t518\t37\t12\t11\t38\t1.80\tGGAAAACATGAAATAAAAATTATATCTTTCGTGTTTTTTAACATATACCTTCTAAGCTTGAAAATAACATTTTTTATTTGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACAATCGGAAAATAAGTA\n+contig1\t37571\t37606\ttrf\t8\t4.4\t8\t88\t0\t52\t31\t25\t42\t0\t1.55\tGACGGACA\n+contig1\t38436\t38491\ttrf\t12\t4.6\t12\t81\t0\t65\t0\t38\t27\t34\t1.57\tTGTCCGTCCGTC\n+contig1\t38436\t38491\ttrf\t20\t3.1\t18\t80\t17\t69\t0\t38\t27\t34\t1.57\tTGTCCGTCTGTCCGTCCG\n+contig1\t38436\t38491\ttrf\t8\t6.9\t8\t79\t8\t58\t0\t38\t27\t34\t1.57\tTGTCCGTC\n+contig1\t43116\t43168\ttrf\t2\t26.0\t2\t96\t0\t95\t48\t0\t1\t50\t1.12\tTA\n+contig10\t7159\t7189\ttrf\t13\t2.3\t13\t94\t0\t51\t36\t10\t16\t36\t1.82\tAATGATGCATTTA\n+contig10\t7363\t7540\ttrf\t101\t1.8\t95\t85\t7\t246\t44\t9\t10\t36\t1.70\tTTTTTATAAAATTGAATTCGAAATTCAGAACCAATTAAAAAATATTATTTATAAGAAGGTTATATGTTAAAAAACACAGTCGATATGATATAGTC\n+contig10\t8034\t8065\ttrf\t16\t1.9\t16\t93\t0\t53\t38\t25\t35\t0\t1.56\tACAGACAGACGGACGA\n+contig10\t16407\t16472\ttrf\t24\t2.7\t24\t88\t4\t87\t23\t23\t33\t20\t1.97\tGAAGTGTCGCCGAAGTGACTCCTG\n+contig10\t17443\t17508\ttrf\t24\t2.7\t24\t88\t4\t87\t23\t24\t33\t18\t1.96\tGAAGTGTCGCCGAAGTGACTCCTG\n+contig10\t18419\t18897\ttrf\t162\t3.0\t162\t78\t8\t408\t35\t15\t12\t36\t1.85\tCCCAAAGATAATTTTTCCATATTATTTTACCACTAATTTTCCGATCCTTCATATGGCAGCAATATGATATAGTCATCCGATTTCGATAAAAATTGAATTCAAAATTCAGAACTAATTAAAAATGGTTATATCCAAGCTTAGAAAGCTATATGTTAAAAATAA\n+contig10\t28438\t28868\ttrf\t163\t2.7\t160\t78\t7\t465\t38\t12\t13\t35\t1.82\tAACATTTTTTAATTAGTTCTGAATTTAAATTTAAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAATAAAAAAATTATATCTTCGGTGTTTTTAACATATAACTCCAAAGCTTAAAAAT\n+contig10\t32277\t32320\ttrf\t2\t21.5\t2\t95\t0\t77\t48\t0\t0\t51\t1.00\tAT\n+contig10\t34149\t34186\ttrf\t19\t1.9\t19\t94\t0\t65\t32\t13\t37\t16\t1.87\tGCTGAGGATGAGATACAGA\n+contig11\t7433\t7863\ttrf\t163\t2.7\t160\t78\t7\t465\t38\t12\t13\t35\t1.82\tAACATTTTTTAATTAGTTCTGAATTTAAATTTAAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAATAAAAAAATTATATCTTCGGTGTTTTTAACATATAACTCCAAAGCTTAAAAAT\n+contig11\t11272\t11315\ttrf\t2\t21.5\t2\t95\t0\t77\t48\t0\t0\t51\t1.00\tAT\n+contig11\t13144\t13181\ttrf\t19\t1.9\t19\t94\t0\t65\t32\t13\t37\t16\t1.87\tGCTGAGGATGAGATACAGA\n+contig11\t22738\t22776\ttrf\t20\t1.9\t20\t88\t0\t58\t34\t13\t7\t44\t1.72\tTATATATATAGAACCTGTTC\n+contig11\t25115\t25366\ttrf\t138\t1.8\t138\t95\t0\t457\t36\t13\t9\t40\t1.77\tCAAATTTTTTGTTTAAAACCGTTTTGGACTCTAAGGCTATGCAATGCATATAACGTTATAAAAAAAGTATTTACTTTTTTAACAAATTTATAACTTACCTATAACATATAACAAGAATACCTTTTGTTTACATTTTAC\n+contig11\t25870\t25921\ttrf\t18\t2.8\t18\t91\t5\t77\t25\t23\t0\t50\t1.49\tTCATCTATATCTTTCATA\n+contig11\t25872\t25939\ttrf\t23\t2.9\t23\t75\t6\t64\t23\t22\t1\t52\t1.56\tATCTTTCATTTCATATCATCTAT\n+contig11\t31165\t31562\ttrf\t159\t2.5\t155\t82\t6\t458\t38\t12\t13\t35\t1.82\tTTTTGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCCATAGAACGATCGGAAAATTAGTGGAAAATATAAAAAAATTATATCTTCGGTGACTTTAACATATAACTTCCAATACTTGAAAATACAATTTTTAATTAGTTCTAAA\n+contig12\t2269\t2312\ttrf\t2\t21.5\t2\t95\t0\t77\t48\t0\t0\t51\t1.00\tAT\n+contig12\t4141\t4178\ttrf\t19\t1.9\t19\t94\t0\t65\t32\t13\t37\t16\t1.87\tGCTGAGGATGAGATACAGA\n+contig12\t13735\t13773\ttrf\t20\t1.9\t20\t88\t0\t58\t34\t13\t7\t44\t1.72\tTATATATATAGAACCTGTTC\n+contig12\t16112\t16363\ttrf\t138\t1.8\t138\t95\t0\t457\t36\t13\t9\t40\t1.77\tCAAATTTTTTGTTTAAAACCGTTTTGGACTCTAAGGCTATGCAATGCATATAACGTTATAAAAAAAGTATTTACTTTTTTAACAAATTTATAACTTACCTATAACATATAACAAGAATACCTTTTGTTTACATTTTAC\n+contig12\t16867\t16918\ttrf\t18\t2.8\t18\t91\t5\t77\t25\t23\t0\t50\t1.49\tTCATCTATATCTTTCATA\n+contig12\t16869\t16936\ttrf\t23\t2.9\t23\t75\t6\t64\t23\t22\t1\t52\t1.56\tATCTTTCATTTCATATCATCTAT\n+contig12\t22162\t22559\ttrf\t159\t2.5\t155\t82\t6\t458\t38\t12\t13\t35\t1.82\tTTTTGAATTAAATTTTATCA'..b'AAAAATTATATCCTTGGTATTTTTTAACATATAACAATATAAGCTAGAAAAG\n+contig70\t17962\t18295\ttrf\t161\t2.1\t160\t83\t5\t413\t37\t12\t13\t35\t1.84\tAAAAAATTATATCTCTGGTGTTTTTAAACATATAACCTCCTAAACTTGGAAATAACATTTTATAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAACCCGACGACTATATCATACATGTAACTGTAACGATCGGAAAATTGATGGGAAAATAATATG\n+contig70\t19030\t19306\ttrf\t137\t2.0\t137\t94\t1\t482\t39\t13\t13\t34\t1.83\tTTTAACACATACCTTTCTAAGCTTGGATATAACATTTTTAAACTGGTTCTGAATTTCAAATTAAATTCAATTAAAATCGGACGACTATATCATATAGCTCCCATAGGAAAAATCGGAAAATTAGTGAGAAAATAATA\n+contig70\t22269\t22308\ttrf\t19\t2.1\t19\t100\t0\t78\t58\t15\t15\t10\t1.62\tAACTAAGGAAATACCAGAA\n+contig70\t22812\t23252\ttrf\t162\t2.8\t156\t84\t5\t562\t37\t11\t13\t37\t1.81\tTTCTAAGCTTGAAATAACATTTTTTAATTAGTTCTGAATTTCGAATTTAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAAATAAAAATTATATTTGTATTTTTAACATATAACC\n+contig70\t23421\t23486\ttrf\t13\t4.7\t13\t70\t20\t51\t38\t0\t0\t61\t0.96\tATATATATAATTT\n+contig70\t23426\t23479\ttrf\t7\t7.4\t7\t79\t12\t54\t35\t0\t0\t64\t0.94\tTATATTT\n+contig70\t23430\t23486\ttrf\t27\t2.0\t27\t83\t10\t69\t35\t0\t0\t64\t0.94\tATTTATATATTTATTAATATATTTTAT\n+contig70\t25576\t25606\ttrf\t12\t2.5\t12\t94\t0\t51\t33\t26\t36\t3\t1.73\tACGGACGGACAG\n+contig70\t27263\t27382\ttrf\t63\t1.9\t63\t85\t3\t168\t27\t25\t21\t26\t1.99\tATTTTTCCAAGCACTTTCCTGTACAAGGGAAACGTCCCAGGGAAAGCCTATCGGAATTTCAAA\n+contig70\t27353\t27406\ttrf\t25\t2.1\t25\t85\t0\t70\t30\t26\t32\t11\t1.91\tGGAAACGTCCCAGGGAAAGCCCATC\n+contig70\t27848\t27878\ttrf\t12\t2.5\t12\t88\t0\t51\t30\t30\t40\t0\t1.57\tACGGACGGACAG\n+contig70\t35504\t35713\ttrf\t102\t2.0\t102\t99\t0\t409\t37\t12\t13\t36\t1.82\tAAAACGATGGTAGACAAATATGCATATATTTTTTACACAAAACGAAATATAATGGACTTTTAAAAATTCTTTTTTCTATCTTTCCTGGTGGGAGATATATAT\n+contig70\t43333\t43446\ttrf\t57\t2.0\t57\t94\t0\t199\t30\t17\t22\t29\t1.97\tAATATTGGGAATAACATATTATCTTATAATATGGGAGCGCGAAGGCTCCTCGCCCAT\n+contig70\t44241\t44281\ttrf\t13\t2.9\t14\t85\t7\t55\t27\t22\t12\t37\t1.90\tCTACATGTTACATG\n+contig70\t44242\t44281\ttrf\t7\t5.7\t7\t81\t6\t53\t28\t20\t12\t38\t1.89\tTACATGT\n+contig70\t44485\t44531\ttrf\t14\t3.3\t14\t84\t0\t65\t41\t21\t10\t26\t1.86\tAACCGTATATGACT\n+contig70\t45147\t45181\ttrf\t16\t2.0\t17\t88\t11\t52\t52\t0\t2\t44\t1.16\tAAATATTAGTAATATAT\n+contig70\t46706\t46990\ttrf\t147\t1.9\t147\t100\t0\t568\t39\t10\t9\t40\t1.72\tTTCTTTATTTTTTTTATTTTAAAATACTTAGTACTTAGTAATGTCGCTAAAACCAATATAATATTCTTTAAAATTTAGAAAATATATTCAGACTTCGAATTACAAGGTGTATTAGAAGATAAATTAAACTCTAAAGCTTAATTTATC\n+contig70\t53900\t53978\ttrf\t28\t2.8\t28\t80\t5\t97\t15\t43\t5\t35\t1.69\tCTCTGTCACCCTCTCTTTACCTACCTCA\n+contig8\t2833\t2903\ttrf\t24\t2.9\t25\t83\t8\t92\t15\t27\t27\t30\t1.96\tCTTCGGCGACACTTCTTGGAAGTCA\n+contig8\t22744\t22922\ttrf\t88\t2.0\t88\t97\t0\t338\t28\t16\t24\t30\t1.96\tATGGGAGCTATAAGATATAGTTGTCCCATCCGGCAGGTTTCGACTTATATATTGCCTGCCATAGAAAGGAAACTTTTGGGAAAGTTTC\n+contig8\t29631\t30109\ttrf\t162\t3.0\t159\t81\t4\t573\t36\t12\t14\t36\t1.84\tAAAAATTATATCTTCGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACAATTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCCATAGGAACAATCGGAAAATTAGTGGAAAATGAAAT\n+contig9\t2833\t2903\ttrf\t24\t2.9\t25\t83\t8\t92\t15\t27\t27\t30\t1.96\tCTTCGGCGACACTTCTTGGAAGTCA\n+contig9\t22744\t22922\ttrf\t88\t2.0\t88\t97\t0\t338\t28\t16\t24\t30\t1.96\tATGGGAGCTATAAGATATAGTTGTCCCATCCGGCAGGTTTCGACTTATATATTGCCTGCCATAGAAAGGAAACTTTTGGGAAAGTTTC\n+contig9\t29631\t30109\ttrf\t162\t3.0\t159\t81\t4\t573\t36\t12\t14\t36\t1.84\tAAAAATTATATCTTCGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACAATTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCCATAGGAACAATCGGAAAATTAGTGGAAAATGAAAT\n+contig9\t37064\t37094\ttrf\t12\t2.5\t12\t100\t0\t60\t0\t40\t26\t33\t1.57\tGTCCGTCTGTCC\n+contig9\t37275\t37670\ttrf\t162\t2.4\t160\t82\t10\t455\t37\t12\t13\t35\t1.83\tATAACAATTTTTATTTGTTTTGAATTTCGAATTAAATTTATCAAAATCGGACGACTATATCATATAGCTGCCAAGAGAAACAATCGGAAAATTAGTGGAAAAATAACATTGAAAAAGTATATCTTCGGTGTTTCTTAACATACAACCTCATAAGCTTGAA\n+contig9\t37280\t37752\ttrf\t161\t2.9\t161\t80\t6\t492\t37\t11\t15\t35\t1.84\tATTTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGCAATAGGAACAATCGGAAAATTAGTGGGAAATACATGTGAAAAAATTATATCTTTGGTGTTTTTAACATATAACCTTATAAGCTTGGAAATACA\n+contig9\t56173\t56203\ttrf\t13\t2.3\t13\t94\t0\t51\t36\t10\t16\t36\t1.82\tAATGATGCATTTA\n+contig9\t56377\t56554\ttrf\t101\t1.8\t95\t85\t7\t246\t44\t9\t10\t36\t1.70\tTTTTTATAAAATTGAATTCGAAATTCAGAACCAATTAAAAAATATTATTTATAAGAAGGTTATATGTTAAAAAACACAGTCGATATGATATAGTC\n+contig9\t57048\t57079\ttrf\t16\t1.9\t16\t93\t0\t53\t38\t25\t35\t0\t1.56\tACAGACAGACGGACGA\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/output/dbia3_trfBig.bb
b
Binary file test-data/trfBig/output/dbia3_trfBig.bb has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trfBig/workflowInputs/dbia3.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3.fa.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trfBig/workflowInputs/dbia3.fa.txt Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+AAAACTAATTTTATCAAAATCGGACAACTATATCATATAGCTGCCATACG\n+AACGATCGGAAAATTGGTAAGTAAATAATTAAAAATATTATATCTTTGGT\n+GTGTTTTTGGAAATAACATTTTTTAATATGTTTTGAATTTTGAATTAAAT\n+TCTATCAAAATCGGACGACTATATCATATAGCTGCCATACAAACGATCGG\n+AAAATTGGTGAATAAATAATATGAAAAAATTATATCTTTGGTTATTTTTT\n+AGACAAATAACCTCCAACGCTTGGAAATAACATTTTTTAATTAGTTATAA\n+ATTTGATATTTTAATTTTATCAAAATCGGACGAATATAGCATATAGCTGT\n+TAAAATAATATGAAACAAATTATAGCTCCGGTGTTTTTTACATATTATCT\n+TATACTATTGGGAAAATAGTTTATTATATTTTAAAGAATTTCCAATTAAA\n+CTCTAACATATAGCTTTCAAAGAAACGGTCAAAAAAGTAAAGAAATCATT\n+TTTTTTTAACATCACTGAAGCTAGAAACAATCCTTAAAAATGTAACATGG\n+TGTTAGTAGCATTGAAAATTGCTTATAACTGCAAAGGGAAAACAAACATC\n+GGCTTGCCGAATGTAATTTCCATTCTTGTTTGACTTGAGTTTATAACTTA\n+CAATTATGGTAAGGTGCCTGATTTTGGTTTTTGCCATACGGTATGTGGTA\n+GTTATCATTTTGCCGTGCTATAAGTGACATCACAGCCGATTCTTTTACTC\n+CATAATGGGCCAATGTGTTGAGGCGTTTCCATCCATTTATTGTTTTTGTA\n+GTTAGGTCTTCATCTTGTAATGTGAGATGACCTCCTCTTCCATGCCTCCA\n+TTCTAAGTCAACTTCGTGCACAGATGGTCTCATAGAAAACGGAGTATTTT\n+TAAAAATTGCGTCCAAAATTTTTAATTTAACTTGCGATATGGTATCCAAA\n+TCGTTTACACGACATTGGACTTTTTCATCAAGATCATCTTGTAAAATGTG\n+CAAAATTACAACAGAATGGGTAACTTGTTCGTGGAGAAGTCGTTCCTCTG\n+AAAGTGAATAACGAGCATCGTGGGTTATTGCGTCCACCAAACCCTTTTCA\n+ATTTGATGCTTAATTGCCTTGAACAGCAAAAATAAATTAGATCCAGCATA\n+TTCTTTTAGGTAGTCGTACATACAAATTGCTAAGTAGTTTGTTAACATTT\n+TTTCAACTACGCTCTCAGTGCGTCGTAGCATTAGCTGAGGATGCTTGCTG\n+GCGAGCGATTTGTCAATTAATCGCAATAAAAGGGACTTTAAAATTTCCGT\n+CGCATATTCCATTTTGTTCATTAGAACAACCATAAGTAAAGAGGCGACGT\n+TAACTCGATCGCGAATTGAGAAAGATGACCGTTGAGCTTCTAAAGTTTCT\n+ATGAACAATAGTAAAAAATATTTGTTTCCAATAAGTTGCTCGAATTGTAT\n+CATAGCTGCATCATAGTTAGTGTGCGGGCTACTTCCACAAAATTTTCGGG\n+AGTTTAGAATAGGATGATCTGATACACCGGGAAAGAAAACTTTCATAATG\n+TAATTGACGTGATCTAACGTTGGTATACCGGTGCTCTCCAAATCTGCTGT\n+TAGATCGGTCATGTCCGTTTGGAGCTCAGCAAATGCCTGTTTACACTCAG\n+AACGAACGTTGCTTTCCAATGTTATCATCTGTATCTGAATTCGTTTGTAT\n+TCCCGTTCTGCTTGGGTCGATTTCCTTCTAAATATTATTAGTACGACAAC\n+CAAAACGATGACAAGCACTGCAACTGTCAATATAACGACAAACATGGCAT\n+GTGAAAAAACATAGGGTTTATTTAAATCATATTTCAAATATCCTATGGCG\n+AAACGAAGATTTCGCCCTACTTTAACCACAACTAGAGGTAAATCTGTCGA\n+TTGATCCACACCATTTTCATCAGTTGGGAGTGGTTGATGTTCCGGTGGAA\n+TGCACAAAAGTTGAGTTAGTGTAAGGCTTGTTATATTGCATTGAGTAGTA\n+CCAATGGTTACATTAACGTCGTATTCATCAGCTGCCAAATTTAGTAGCTC\n+GCCTTCAATAACCAAGCTGTCACCCTTGTATAGTTTAATTCCGTCATTTG\n+GAAATGGTAAATATTTTGGATCGGCTAAATAAACAATAGTGCTCCTGATA\n+TCATGAAAGTATTTATTTAGATCTCGAACAAGCTGTACATTGTCCATTAC\n+AAAACTAAGTTGTAAATTCAGTTGCGTTTCATGGACCTTTACAAAAGTTG\n+TAACATCCATATTATTATTGACGAAATAGCTAGGCGCAGTTCCCCCTGTT\n+GTGTATATATGAAAGTTGTCTACGAAGTCTGCTTTTCGCTTTCGTCTAGA\n+ATATTCGTTCTGATGCGCACTTTCATTTTTAAAGTCAAATTCCGAATCCA\n+TCACTCTGTTAGAACTTTTAAATGCTTGAAATTTATCATTTACCGGAGGA\n+GATGGGCATTCCATTTGATTTGAATTGATTACTACGCAAGATGTTTTGTT\n+TACTCTTTCATTATCAAAAAAAACTTCAAGTTCAGGCTTTTGAATTGAAT\n+TAAGATACATTCCATGAACGGTCAGAACGCGACCACCACTTACAAAACTG\n+CGTAAAGGCTTAATCTGCATTATACGCGGGTCTTGGGTATAGTTAAAAAT\n+AGAACAGGGCTGCCTTGGTAACGTACGAAATTGGTATGAACCGAAATTAC\n+TTCGAGTTAGATTTTTATTGGGTATAGTTGCGGGTGAGATATGACATTCT\n+AAAGTTCGGTTGGCACCGTCAATAAGAAGGTGTAGAGACCGTATTGGTTC\n+TGGCTGGGTCGCCTCTGATGTAGTACTACTAACTTGAGTTGAAGAAGCTT\n+GTGTAACGTTTATATGACACTCGTACTCATCTAAATATGCTCGCATGGTT\n+GATCCAATGTTCAAAAACTTTCCTATTAACGACAATTGTGTTCCTCCCGA\n+CCTGGGCCCAATCGTGGGATATAATCCTGTTAGCAAAACATTCTTAAAGT\n+GAAATTGTACACTAGATTCAGTATACCCTGCATCATTTGCAACCTTTATA\n+GGCGCTGACATTTCATACATCACTGCTCCAGTGCGGCATTCGATCTTTAC\n+AGAAATTTGGTAGTTGACTAGTTCACAAGGCACAGACCCAATAAATATTT\n+TTCCACGCACATCTCCTTCGCGAATGCCCAAATTACTGCCTTCAATTGTA\n+ATAAGGGTACCACCCTCAATAGGTCCAGATAACGGTTTAATAATATCGAT\n+CCTTGGTAAAGGGCACTCGTTTTCTTTAGTTGTTTTTGATCCTGCACTAA\n+TAGCACGATCACTGTCTGCTATGCAAGTCTCGTTATATACACATGAGTTG\n+CTGCACCAAGCACATTGGTATTTTGGATCACGAGTGACACACAAACTGCA\n+ATCAGCATGATCCCGATGTGAGCCCAGTACGTCGCATTTATATAATGTAA\n+CAATCGCCGTGTCCACATAGTGCTGGAAATTCCATGTAATTACAACCTTT\n+GCTTGGTATTCATGTGTATTAGTCTCGTAGAAATAAGGTGTTTTTTCACA\n+AACAACAATCTTGTTTGACTCAATGTGGGCAGGCAATAGCATCTGAGCAG\n+CTTCAATATGAACTGTACACAAGAATCCAGCGTGAGCGCTTTTGGGTTTT\n+GGTAAGTTTTCTATCTCTAAACGAATTTCTTTTGGCACCCGCACCGGTAA\n+AAGAATCGCCGGACGATTGCTTTTTAAATGGGGGCATTGACCAACACTAG\n+TTACCGAATTTTCTATATTACGACATTGTATTGATTGATGGACACATTTA\n+TTGTCAAAAATACACC'..b'CCACTGGACCCGTTGGATAGCGTC\n+GGCACTTTTCTGTTTGGTGCCATTCACTGGTCACTAGTAGGACCGCTCTG\n+CCTATTAAAATAGGAGTGCTGCTTAGAATTTATTTCAGCAAGACAGCCGT\n+AAGATATATATGAATAGTATTTAACGGCAGTGGCAGAGGCCTTTACAATA\n+TAAGGCTTTACGGGATTTTAAACAATGAAAAACAAAAATATTTTTATAGA\n+GTCGAGATAAAATACTTTTTACAATAAATACTAAGTATTTTTAAAAGATA\n+AAAATATTTAAAAGTAAATACTGAATATTTTAACAGGTGTTAAGTACTGG\n+GAACTGTGACTAAGGACGGCGGTATATCGCTACCATAATATTTTTAAAGG\n+AACTGAATACTGAAAGATGTGACCAAAATGGTAGAATATCGCTAACGAAA\n+ATCACAGACGCAAATATCGATAGTGGCCTAGAGACCCGATATCGCCGCCG\n+TGATTATCGATGAAACTACAGCTGAGCTGCTTGTTTACATTCTAAAGTTA\n+AAATTTTTAAAAATTTCCAATACAAAAAAAAATTCTTCGTATTAACACGG\n+CTGTTTGCCGGTAAGATCTGAAGTGGATTCAAGTTGCAGCACCCTCAGGA\n+AAAGAATAAATTATTATAATATAATAATATAATATAAAGAATTTAAATTC\n+GAACCAAACGCGGTATGTCACTCTTCATTTAGAGAATTTGTGGCTTTGTT\n+TTGTACAAAAAACTATTCTTTTTTCTTGAAAATTAGCATCAAGAATTTAT\n+TGCCAAAACAGAACGCAATTCTGCTAAATCTGGTTACCCTGCCAAATGGC\n+TGGCCGTTCTGTGCATTCAATAACACAAGCTAACGGCTTACGGCACATTC\n+GGAAAGATCCAAAAGAATTTCTATTCCGGCATTTTTTATGTATTATCGTC\n+CAATTTTTGTATGTGTATTCATTCTTTTGGCTGTCCGTTATTAAGCTTTT\n+CTTATCCACATAAGCTGACCAGCTGCCAAAAAGCCGACCGTTTTGTGCAG\n+CCAATAACCCAAGCTAACTATCAACAGCAATTTCGAAAAATCCAGAAGAA\n+GCTATCCTTCGGCAATTTTTTCTTATATTATAATTTTTTTGTCACAATTT\n+TTTGTCAAAAAATCGACAATAATTAAGTGTTTACATTTTAATGCGATTTA\n+ATTGGAAATTTGAATTCGAATCAAACGCGGTGTCACTCTACATATGGACA\n+ATTTGTGGCTACAAAATTAGTTGAAAGTGCATGTTTTTTTTTAAGTGAAA\n+TAAAACAAAAACAGCAATAACAGGCCAAAATGTTGTTTTATAAATAAATA\n+CTTACATATCCGCATATAAATATGAATGAAAAAAAAAACAGCAAGACTGG\n+GCCAAAATATTGTTCTTTAATATGTAAATAAATACATATCTACATATACA\n+TATGTGGACAAAAGTGTTAAAATTAGTTAATAGTGCATGTTTTTTAAATG\n+AAAAAATAAAACAAAAAAAGCAAGCAAATATAAATTATATTCCACACACA\n+TTTACAATAATTTCAAACTACGCTAAAAATACCCAACAGAAAAAACATAG\n+AAAGGTAAACAAAATACAAGAATATCAAAAAAAATCTAATTTTCACGTTG\n+TTGTGGCCGGTACCACCGGTCATACATACATATAGATACATATTTAGATT\n+CTCACAAAAAGAATAGAAAATTACCGCTAATTTCCTTATATACGTTTTTG\n+TTTTCAGTGTTTTGAAATAGTATTTTTCAAACAAAAAACAATACAAAAGG\n+GTTAAAAAAATTGTTCGATTGGCTTTAACATACTAATTAAAAAAAATCAC\n+AATTTTTTGGCAAAATATCGAAAAAAATTAAGTGTTTACATTTTTATGCG\n+AATTATTTGGAAATTTAAATAAGAACCCAACGCGGTTTGTCACTCTACAT\n+ATGGACAATTTGTGGCTTTGTTTTGAACAAAAAATGAATTTTTTTTTACT\n+CAAAATTAACAAGAAGAAACAGTAATGCGGGTTTTGATCGCAATTCTGCT\n+AAATCGAATTTCCGTGCCCAAAGGCTGACCTTTTTGTGCAGCAAATAGCC\n+CAAGCTAATAATCTACAGCAATTTCTTAAAGACCGAAAATGTATCAACTT\n+CGGCAAAATATGAAAATTTAACATTTATTTTTCAACATTTTTTGCCAAAT\n+AATCGACAAAAATTTCAGTGTCCCGATTTGGATGCGAATTAATTGAGGAC\n+AATTGTGACTTTGTTTTGTAAAAAAACTATTTTTTTTACTGAAAATTACA\n+ATCAAGAATCTAAGGCCAAATCAGAATATTTTCACTGCGGTTTTTTGATC\n+GCAATTCTGCTAAATCGGGTTCCGTGCCAAAAAGCTGACTATTTTTTTTA\n+CTGAGAATTAGCATCAAGAATTTATTGCCAAAACAGAACGCAATTCTGCT\n+AAATCGGGTTGCCCTGCCAAAAGGCTGGCCGTTTTGCGCATTCAATAACA\n+CAAGCTAACGGTTTACAGCACATTCGGAAAGATCCAAAAGAATTTCTATT\n+TCGGCATTTTTTATGTATTATCGTCTAATTTTGGTATGTGTATTCATTCT\n+TTTGGCTGTCCGTTATTAAGCTGCTTTTCTTATTCACATATTCAATGGCG\n+CAGTACATAAAAAGAATGAACCGTGGTTTTCTTGCCCTCTCCATCTCCCT\n+CTACCACTTCCCCTCTCAACAACTATCTCACGCACTCTGTCCCTTCTCGT\n+CTCTGTCACCCTCTCTTTACCTACCTCACTCTGGCACACTCTCTCTACCT\n+ATCTCACTCTGTCACCTTCTTCTACCTAATTCTCTCTGTCGCGCTCGCTC\n+TCTCAACATGCCTTTCGCACCGTATCTCTTCGTTTCTCTGTATGCTTGCT\n+CTCTTAATTTAAGTTTCTCTTTCTTTGTGTTCAATATCTTTCGCGCTCGT\n+GACTCCTATTGACCGGACTCTGAGCGGTGCTCATTAAGAGTGGAGTTTTC\n+GATGATGGCGGGTGGCGCGAACGAGAATTTTTTACATAAAGGTAAGTTGC\n+CAATTCTTTTTTAAATGTGACATACAGCATGCATATTTTATTTATTGTAA\n+TTATATGTGAAAGAATAAAATTATATGTACTTTTATCTAATCTATTGCAT\n+CTATTTTTTCACAGGTAAACGAGTCACACGAGAAGTAAAACACAAACACA\n+AGGATCAAAATAAAATTCAGGTGAGTGAACGAACGTGTTGAAAGTAGTTA\n+ATAGTGCATGTCTATTTAAGTGTAAGAAACAAAAAAGACACAAATGGGCC\n+AATAATTCGTTTCTTTAATAGATAACTGCCTATATTATGTTCAAACTATG\n+CTAAAAAGACCCAAAAGGAAAACATAAAGTATATACCTTGCAAAATGAAT\n+AAAAAGAAATCATATATATTCATATACGTTTGACCGGTACCACCGGTCAT\n+ACATACATAAACATAAACACAAAAAGACTAGAATATAAATACCATATTTT\n+TGATTGGCTTAAAAATGGCACAAAAAAAAGATAAAAACTAGAGTGTCTAG\n+CTTTAAGTGACAATCGACTCGGAAAATGATCAAGAAATAAATGAAAAATT\n+TGTCTTGCGGCTTTTTGGTCGCAATTCTAAATCGGTTTGTATGTATGTCA\n+CTCTTCATGTATACAATTTGGTGGCTTTGTATTCCACAAAAGTCACAGTA\n+GTCACAAAAGTTTTTTTTACTGAAAATTACCATCAAGAGTCTATGGCCAA\n+ATCAGAATATTTTCACTGCGGTTTTTTATTTCAATTCTGCTAAATCGGGT\n+TTCCGTGCCAAAAAGCTTTTGCAGCCAACAACAATATTCAATTTTTTGGC\n+AAAAAATCGACAAAAAAATT\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted.bed Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,602 @@\n+contig1\t9130\t9428\ttrf\t163\t1.8\t164\t86\t2\t438\t36\t12\t13\t37\t1.82\tAAAAAAAATTATATCTTCGGTGTTTTTCAACATACAACCTCCTAAGCTTGGAAATAACATTTCTTAATCAGTTCTGAATTTCGAATTAAATTTTTATCAAAATCGGACAACTATACCATATAGCTGTCATAGGAAGGATTGGATAATTAGTGGTAAAATAATAT\n+contig1\t15707\t15757\ttrf\t20\t2.5\t19\t83\t9\t55\t32\t12\t2\t54\t1.49\tTATATCATTTATATCTCAG\n+contig1\t16261\t16302\ttrf\t12\t3.4\t12\t89\t0\t64\t2\t36\t24\t36\t1.69\tTCCGTCTGTCCG\n+contig1\t23387\t23483\ttrf\t45\t2.2\t45\t92\t1\t158\t19\t22\t30\t27\t1.98\tTCGGCGAAGAAATGTGCCACTTCGGCGGCACTTCTTGGAGTCACT\n+contig1\t23451\t23520\ttrf\t24\t2.9\t24\t80\t0\t75\t15\t26\t27\t30\t1.96\tTTCGGCGACACTTCTTGAAGTCAG\n+contig1\t24200\t24233\ttrf\t15\t2.2\t15\t94\t0\t57\t51\t12\t15\t21\t1.75\tAAACAATGGAATGCT\n+contig1\t29159\t29628\ttrf\t162\t2.9\t160\t80\t8\t518\t37\t12\t11\t38\t1.80\tGGAAAACATGAAATAAAAATTATATCTTTCGTGTTTTTTAACATATACCTTCTAAGCTTGAAAATAACATTTTTTATTTGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACAATCGGAAAATAAGTA\n+contig1\t37571\t37606\ttrf\t8\t4.4\t8\t88\t0\t52\t31\t25\t42\t0\t1.55\tGACGGACA\n+contig1\t38436\t38491\ttrf\t8\t6.9\t8\t79\t8\t58\t0\t38\t27\t34\t1.57\tTGTCCGTC\n+contig1\t38436\t38491\ttrf\t12\t4.6\t12\t81\t0\t65\t0\t38\t27\t34\t1.57\tTGTCCGTCCGTC\n+contig1\t38436\t38491\ttrf\t20\t3.1\t18\t80\t17\t69\t0\t38\t27\t34\t1.57\tTGTCCGTCTGTCCGTCCG\n+contig1\t43116\t43168\ttrf\t2\t26.0\t2\t96\t0\t95\t48\t0\t1\t50\t1.12\tTA\n+contig2\t540\t590\ttrf\t20\t2.5\t19\t83\t9\t55\t32\t12\t2\t54\t1.49\tTATATCATTTATATCTCAG\n+contig2\t1094\t1135\ttrf\t12\t3.4\t12\t89\t0\t64\t2\t36\t24\t36\t1.69\tTCCGTCTGTCCG\n+contig2\t8220\t8316\ttrf\t45\t2.2\t45\t92\t1\t158\t19\t22\t30\t27\t1.98\tTCGGCGAAGAAATGTGCCACTTCGGCGGCACTTCTTGGAGTCACT\n+contig2\t8284\t8353\ttrf\t24\t2.9\t24\t80\t0\t75\t15\t26\t27\t30\t1.96\tTTCGGCGACACTTCTTGAAGTCAG\n+contig2\t9033\t9066\ttrf\t15\t2.2\t15\t94\t0\t57\t51\t12\t15\t21\t1.75\tAAACAATGGAATGCT\n+contig2\t13992\t14461\ttrf\t162\t2.9\t160\t80\t8\t518\t37\t12\t11\t38\t1.80\tGGAAAACATGAAATAAAAATTATATCTTTCGTGTTTTTTAACATATACCTTCTAAGCTTGAAAATAACATTTTTTATTTGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACAATCGGAAAATAAGTA\n+contig2\t22404\t22439\ttrf\t8\t4.4\t8\t88\t0\t52\t31\t25\t42\t0\t1.55\tGACGGACA\n+contig2\t23269\t23324\ttrf\t8\t6.9\t8\t79\t8\t58\t0\t38\t27\t34\t1.57\tTGTCCGTC\n+contig2\t23269\t23324\ttrf\t12\t4.6\t12\t81\t0\t65\t0\t38\t27\t34\t1.57\tTGTCCGTCCGTC\n+contig2\t23269\t23324\ttrf\t20\t3.1\t18\t80\t17\t69\t0\t38\t27\t34\t1.57\tTGTCCGTCTGTCCGTCCG\n+contig2\t27949\t28001\ttrf\t2\t26.0\t2\t96\t0\t95\t48\t0\t1\t50\t1.12\tTA\n+contig2\t38280\t38311\ttrf\t16\t1.9\t16\t93\t0\t53\t41\t19\t6\t32\t1.77\tTACATACATACATATG\n+contig3\t3265\t3296\ttrf\t16\t1.9\t16\t93\t0\t53\t41\t19\t6\t32\t1.77\tTACATACATACATATG\n+contig3\t17933\t17958\ttrf\t10\t2.5\t10\t100\t0\t50\t60\t8\t0\t32\t1.26\tATATAAACAT\n+contig3\t19067\t19124\ttrf\t28\t2.0\t28\t100\t0\t114\t43\t14\t10\t31\t1.79\tAATTAAATTTTATCAAAATCGGACGACT\n+contig3\t23368\t23418\ttrf\t2\t25.0\t2\t100\t0\t100\t50\t0\t0\t50\t1.00\tAT\n+contig3\t24889\t25322\ttrf\t160\t2.7\t161\t80\t6\t452\t36\t12\t12\t38\t1.82\tAATATTAAAAATTATATCTTTGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACATTTTTTATTTGGTTTAATTTCGAATTAAATTTTATTAAAATCGGACGACCATATCATACAGCTCCCATAGAAACAATCGGAAAATTAGTCGGAAACATG\n+contig3\t26816\t26900\ttrf\t18\t5.0\t16\t83\t13\t89\t20\t5\t0\t73\t1.03\tTTTTTTTATATATTTT\n+contig3\t26816\t26887\ttrf\t18\t4.1\t17\t84\t12\t90\t19\t4\t0\t76\t0.96\tTTTTTTTATATATTTTT\n+contig3\t26816\t26900\ttrf\t15\t5.2\t15\t79\t16\t71\t20\t5\t0\t73\t1.03\tTTTTTTTATATATAT\n+contig3\t26820\t26887\ttrf\t17\t3.7\t19\t80\t20\t74\t20\t4\t0\t74\t0.99\tTTTATATATTTTTTTTTCA\n+contig3\t29468\t29515\ttrf\t25\t2.0\t23\t84\t12\t60\t34\t0\t17\t48\t1.47\tATTATAATTATGATGTTATGATG\n+contig3\t38756\t38791\ttrf\t10\t3.6\t10\t92\t7\t63\t40\t0\t0\t60\t0.97\tATTTATTTAA\n+contig3\t38758\t38788\ttrf\t9\t3.2\t9\t95\t4\t51\t40\t0\t0\t60\t0.97\tTTATTTAAA\n+contig4\t7931\t7956\ttrf\t10\t2.5\t10\t100\t0\t50\t60\t8\t0\t32\t1.26\tATATAAACAT\n+contig4\t9065\t9122\ttrf\t28\t2.0\t28\t100\t0\t114\t43\t14\t10\t31\t1.79\tAATTAAATTTTATCAAAATCGGACGACT\n+contig4\t13366\t13416\ttrf\t2\t25.0\t2\t100\t0\t100\t50\t0\t0\t50\t1.00\tAT\n+contig4\t14887\t15320\ttrf\t160\t2.7\t161\t80\t6\t452\t36\t12\t12\t38\t1.82\tAATATTAAAAATTATATCTTTGGTGTTTTTTAACATATAACCTTCTAAGCTTGGAAATAACATTTTTTATTTGGTTTAATTTCGAATTAAATTTTATTAAAATCGGACGACCATATCATACAGCTCCCATAGAAACAATCGGAAAATTAGTCGGAAACATG\n+contig4\t16814\t16898\ttrf\t18\t5.0\t16\t83\t13\t89\t20\t5\t0\t73\t1.03\tTTTTTTTATATATTTT\n+contig4\t16814\t16885\ttrf\t18\t4.1\t17\t84\t12\t90\t19\t4\t0\t76\t0.96\tTTTTTTTATATATTTTT\n+contig4\t16814\t16898\ttrf\t15\t5.2\t15\t79\t16\t71\t20\t5\t0\t73\t1.03\tTTTTTTTATATATAT'..b'\n+contig69\t18503\t18850\ttrf\t163\t2.1\t163\t88\t3\t502\t36\t12\t16\t35\t1.86\tCTTTGGTGCTATTTGACATATAACCTCCTAAGCTTGGAAATATCATTTTTTAATTGATTTTGAAATTCAAATTAAATTTGATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACGATCGGAAAATTGGTGGAAAAATAATATGAAACAAATTATAG\n+contig69\t18536\t18888\ttrf\t163\t2.2\t161\t85\t4\t472\t36\t9\t16\t37\t1.82\tTTGGAAATACAATTTTTTATATTTATGAAATTCAAATTAAATTTGATCAAAATCGGACGACTATATCATATAGCTGTCATAGGAACGATCGGAAAATTGGTGGAAAAATAATATGAAACAAATTATAGCTTTGGAGCTGTTTGACATATAACCCTATAAGA\n+contig69\t19781\t19812\ttrf\t15\t2.1\t14\t94\t5\t53\t58\t0\t0\t41\t0.98\tATATAATTATAATA\n+contig69\t23637\t24060\ttrf\t162\t2.7\t156\t83\t6\t496\t37\t11\t12\t38\t1.79\tAAAAAAATTATATCTTCGGTGCTTTTTAACATATAACCTTCTAAGCTTGGAAATAACATTTTTTAATTAGTTCTGAATTTCAATTAAATTTTATCAAAATCGGACGACTTATAGCTGCCATAGGAACGATCGAAAAATTGATGGGAAAATAAATAT\n+contig69\t36863\t36892\ttrf\t15\t1.9\t15\t100\t0\t58\t27\t20\t48\t3\t1.66\tGGGGAACGCGAGCAT\n+contig69\t38645\t39124\ttrf\t158\t3.0\t157\t85\t5\t615\t34\t13\t13\t38\t1.84\tTAAAAATTGTTATTTCCAAGCTTAGAAGGTTATATGTTAAAAAACACCAAGATATAATTTTTTCATATTTTCCGACTATTTTTCCGATCGTTTCTATGGCAGCTATATGATATAGTCGTCCGATTTTGATAAAATTTAATTTGAAATTAAAACCAAT\n+contig69\t43653\t44084\ttrf\t162\t2.7\t162\t87\t4\t586\t40\t10\t13\t35\t1.79\tAACATTTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGAAGACTATATCATATAGCTGTCATAGGAACGATCGAAAAATTGGTGGAAAATAATATAATAAAAATTATATCCTTGGTATTTTTTAACATATAACAATATAAGCTAGAAAAG\n+contig70\t6910\t6939\ttrf\t15\t1.9\t15\t100\t0\t58\t27\t20\t48\t3\t1.66\tGGGGAACGCGAGCAT\n+contig70\t8692\t9171\ttrf\t158\t3.0\t157\t85\t5\t615\t34\t13\t13\t38\t1.84\tTAAAAATTGTTATTTCCAAGCTTAGAAGGTTATATGTTAAAAAACACCAAGATATAATTTTTTCATATTTTCCGACTATTTTTCCGATCGTTTCTATGGCAGCTATATGATATAGTCGTCCGATTTTGATAAAATTTAATTTGAAATTAAAACCAAT\n+contig70\t13700\t14131\ttrf\t162\t2.7\t162\t87\t4\t586\t40\t10\t13\t35\t1.79\tAACATTTTTTAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAATCGGAAGACTATATCATATAGCTGTCATAGGAACGATCGAAAAATTGGTGGAAAATAATATAATAAAAATTATATCCTTGGTATTTTTTAACATATAACAATATAAGCTAGAAAAG\n+contig70\t17962\t18295\ttrf\t161\t2.1\t160\t83\t5\t413\t37\t12\t13\t35\t1.84\tAAAAAATTATATCTCTGGTGTTTTTAAACATATAACCTCCTAAACTTGGAAATAACATTTTATAATTAGTTCTGAATTTCGAATTAAATTTTATCAAAACCCGACGACTATATCATACATGTAACTGTAACGATCGGAAAATTGATGGGAAAATAATATG\n+contig70\t19030\t19306\ttrf\t137\t2.0\t137\t94\t1\t482\t39\t13\t13\t34\t1.83\tTTTAACACATACCTTTCTAAGCTTGGATATAACATTTTTAAACTGGTTCTGAATTTCAAATTAAATTCAATTAAAATCGGACGACTATATCATATAGCTCCCATAGGAAAAATCGGAAAATTAGTGAGAAAATAATA\n+contig70\t22269\t22308\ttrf\t19\t2.1\t19\t100\t0\t78\t58\t15\t15\t10\t1.62\tAACTAAGGAAATACCAGAA\n+contig70\t22812\t23252\ttrf\t162\t2.8\t156\t84\t5\t562\t37\t11\t13\t37\t1.81\tTTCTAAGCTTGAAATAACATTTTTTAATTAGTTCTGAATTTCGAATTTAATTTTATTAAAATCGGACGACTATATCATATAGCTGCCATAGGAACGATCGGAAAATTAGTGGAAAATAATAAATAAAAATTATATTTGTATTTTTAACATATAACC\n+contig70\t23421\t23486\ttrf\t13\t4.7\t13\t70\t20\t51\t38\t0\t0\t61\t0.96\tATATATATAATTT\n+contig70\t23426\t23479\ttrf\t7\t7.4\t7\t79\t12\t54\t35\t0\t0\t64\t0.94\tTATATTT\n+contig70\t23430\t23486\ttrf\t27\t2.0\t27\t83\t10\t69\t35\t0\t0\t64\t0.94\tATTTATATATTTATTAATATATTTTAT\n+contig70\t25576\t25606\ttrf\t12\t2.5\t12\t94\t0\t51\t33\t26\t36\t3\t1.73\tACGGACGGACAG\n+contig70\t27263\t27382\ttrf\t63\t1.9\t63\t85\t3\t168\t27\t25\t21\t26\t1.99\tATTTTTCCAAGCACTTTCCTGTACAAGGGAAACGTCCCAGGGAAAGCCTATCGGAATTTCAAA\n+contig70\t27353\t27406\ttrf\t25\t2.1\t25\t85\t0\t70\t30\t26\t32\t11\t1.91\tGGAAACGTCCCAGGGAAAGCCCATC\n+contig70\t27848\t27878\ttrf\t12\t2.5\t12\t88\t0\t51\t30\t30\t40\t0\t1.57\tACGGACGGACAG\n+contig70\t35504\t35713\ttrf\t102\t2.0\t102\t99\t0\t409\t37\t12\t13\t36\t1.82\tAAAACGATGGTAGACAAATATGCATATATTTTTTACACAAAACGAAATATAATGGACTTTTAAAAATTCTTTTTTCTATCTTTCCTGGTGGGAGATATATAT\n+contig70\t43333\t43446\ttrf\t57\t2.0\t57\t94\t0\t199\t30\t17\t22\t29\t1.97\tAATATTGGGAATAACATATTATCTTATAATATGGGAGCGCGAAGGCTCCTCGCCCAT\n+contig70\t44242\t44281\ttrf\t7\t5.7\t7\t81\t6\t53\t28\t20\t12\t38\t1.89\tTACATGT\n+contig70\t44241\t44281\ttrf\t13\t2.9\t14\t85\t7\t55\t27\t22\t12\t37\t1.90\tCTACATGTTACATG\n+contig70\t44485\t44531\ttrf\t14\t3.3\t14\t84\t0\t65\t41\t21\t10\t26\t1.86\tAACCGTATATGACT\n+contig70\t45147\t45181\ttrf\t16\t2.0\t17\t88\t11\t52\t52\t0\t2\t44\t1.16\tAAATATTAGTAATATAT\n+contig70\t46706\t46990\ttrf\t147\t1.9\t147\t100\t0\t568\t39\t10\t9\t40\t1.72\tTTCTTTATTTTTTTTATTTTAAAATACTTAGTACTTAGTAATGTCGCTAAAACCAATATAATATTCTTTAAAATTTAGAAAATATATTCAGACTTCGAATTACAAGGTGTATTAGAAGATAAATTAAACTCTAAAGCTTAATTTATC\n+contig70\t53900\t53978\ttrf\t28\t2.8\t28\t80\t5\t97\t15\t43\t5\t35\t1.69\tCTCTGTCACCCTCTCTTTACCTACCTCA\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted_withMaskedGenome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/trfBig/workflowInputs/dbia3_trfBig_unsorted_withMaskedGenome.fa Wed Jul 13 13:36:37 2016 -0400
b
b'@@ -0,0 +1,66784 @@\n+>contig1\n+aaaactaattttatcaaaatcggacaactatatcatatagctgccatacg\n+aacgatcggaaaattggtaagtaaataattaaaaatattatatctttggt\n+gtgtttttggaaataacattttttaatatgttttgaattttgaattaaat\n+tctatcaaaatcggacgactatatcatatagctgccatacaaacgatcgg\n+aaaattggtgaataaataatatgaaaaaattatatctttggttatttttt\n+agacaaataacctccaacgcttggaaataacattttttaattagttataa\n+atttgatattttaattttatcaaaatcggacgaatatagcatatagctgt\n+taaaataatatgaaacaaattatagctccggtgttttttacatattatct\n+tatactattgggaaaatagtttattatattttaaagaatttccaattaaa\n+ctctaacatatagctttcaaagaaacggtcaaaaaagtaaagaaatcatt\n+tttttttaacatcactgaagctagaaacaatccttaaaaatgtaacatgg\n+tgttagtagcattgaaaattgcttataactgcaaagggaaaacaaacatc\n+ggcttgccgaatgtaatttccattcttgtttgacttgagtttataactta\n+caattatggtaaggtgcctgattttggtttttgccatacggtatgtggta\n+gttatcattttgccgtgctataagtgacatcacagccgattcttttactc\n+cataatgggccaatgtgttgaggcgtttccatccatttattgtttttgta\n+gttaggtcttcatcttgtaatgtgagatgacctcctcttccatgcctcca\n+ttctaagtcaacttcgtgcacagatggtctcatagaaaacggagtatttt\n+taaaaattgcgtccaaaatttttaatttaacttgcgatatggtatccaaa\n+tcgtttacacgacattggactttttcatcaagatcatcttgtaaaatgtg\n+caaaattacaacagaatgggtaacttgttcgtggagaagtcgttcctctg\n+aaagtgaataacgagcatcgtgggttattgcgtccaccaaacccttttca\n+atttgatgcttaattgccttgaacagcaaaaataaattagatccagcata\n+ttcttttaggtagtcgtacatacaaattgctaagtagtttgttaacattt\n+tttcaactacgctctcagtgcgtcgtagcattagctgaggatgcttgctg\n+gcgagcgatttgtcaattaatcgcaataaaagggactttaaaatttccgt\n+cgcatattccattttgttcattagaacaaccataagtaaagaggcgacgt\n+taactcgatcgcgaattgagaaagatgaccgttgagcttctaaagtttct\n+atgaacaatagtaaaaaatatttgtttccaataagttgctcgaattgtat\n+catagctgcatcatagttagtgtgcgggctacttccacaaaattttcggg\n+agtttagaataggatgatctgatacaccgggaaagaaaactttcataatg\n+taattgacgtgatctaacgttggtataccggtgctctccaaatctgctgt\n+tagatcggtcatgtccgtttggagctcagcaaatgcctgtttacactcag\n+aacgaacgttgctttccaatgttatcatctgtatctgaattcgtttgtat\n+tcccgttctgcttgggtcgatttccttctaaatattattagtacgacaac\n+caaaacgatgacaagcactgcaactgtcaatataacgacaaacatggcat\n+gtgaaaaaacatagggtttatttaaatcatatttcaaatatcctatggcg\n+aaacgaagatttcgccctactttaaccacaactagaggtaaatctgtcga\n+ttgatccacaccattttcatcagttgggagtggttgatgttccggtggaa\n+tgcacaaaagttgagttagtgtaaggcttgttatattgcattgagtagta\n+ccaatggttacattaacgtcgtattcatcagctgccaaatttagtagctc\n+gccttcaataaccaagctgtcacccttgtatagtttaattccgtcatttg\n+gaaatggtaaatattttggatcggctaaataaacaatagtgctcctgata\n+tcatgaaagtatttatttagatctcgaacaagctgtacattgtccattac\n+aaaactaagttgtaaattcagttgcgtttcatggacctttacaaaagttg\n+taacatccatattattattgacgaaatagctaggcgcagttccccctgtt\n+gtgtatatatgaaagttgtctacgaagtctgcttttcgctttcgtctaga\n+atattcgttctgatgcgcactttcatttttaaagtcaaattccgaatcca\n+tcactctgttagaacttttaaatgcttgaaatttatcatttaccggagga\n+gatgggcattccatttgatttgaattgattactacgcaagatgttttgtt\n+tactctttcattatcaaaaaaaacttcaagttcaggcttttgaattgaat\n+taagatacattccatgaacggtcagaacgcgaccaccacttacaaaactg\n+cgtaaaggcttaatctgcattatacgcgggtcttgggtatagttaaaaat\n+agaacagggctgccttggtaacgtacgaaattggtatgaaccgaaattac\n+ttcgagttagatttttattgggtatagttgcgggtgagatatgacattct\n+aaagttcggttggcaccgtcaataagaaggtgtagagaccgtattggttc\n+tggctgggtcgcctctgatgtagtactactaacttgagttgaagaagctt\n+gtgtaacgtttatatgacactcgtactcatctaaatatgctcgcatggtt\n+gatccaatgttcaaaaactttcctattaacgacaattgtgttcctcccga\n+cctgggcccaatcgtgggatataatcctgttagcaaaacattcttaaagt\n+gaaattgtacactagattcagtataccctgcatcatttgcaacctttata\n+ggcgctgacatttcatacatcactgctccagtgcggcattcgatctttac\n+agaaatttggtagttgactagttcacaaggcacagacccaataaatattt\n+ttccacgcacatctccttcgcgaatgcccaaattactgccttcaattgta\n+ataagggtaccaccctcaataggtccagataacggtttaataatatcgat\n+ccttggtaaagggcactcgttttctttagttgtttttgatcctgcactaa\n+tagcacgatcactgtctgctatgcaagtctcgttatatacacatgagttg\n+ctgcaccaagcacattggtattttggatcacgagtgacacacaaactgca\n+atcagcatgatcccgatgtgagcccagtacgtcgcatttatataatgtaa\n+caatcgccgtgtccacatagtgctggaaattccatgtaattacaaccttt\n+gcttggtattcatgtgtattagtctcgtagaaataaggtgttttttcaca\n+aacaacaatcttgtttgactcaatgtgggcaggcaatagcatctgagcag\n+cttcaatatgaactgtacacaagaatccagcgtgagcgcttttgggtttt\n+ggtaagttttctatctctaaacgaatttcttttggcacccgcaccggtaa\n+aagaatcgccggacgattgctttttaaatgggggcattgaccaacactag\n+ttaccgaattttctatattacgacattgtattgattgatggacacattta\n+ttgtcaaaaatacacc'..b'ccactggacccgttggatagcgtc\n+ggcacttttctgtttggtgccattcactggtcactagtaggaccgctctg\n+cctattaaaataggagtgctgcttagaatttatttcagcaagacagccgt\n+aagatatatatgaatagtatttaacggcagtggcagaggcctttacaata\n+taaggctttacgggattttaaacaatgaaaaacaaaaatatttttataga\n+gtcgagataaaatactttttacaataaatactaagtatttttaaaagata\n+aaaatatttaaaagtaaatactgaatattttaacaggtgttaagtactgg\n+gaactgtgactaaggacggcggtatatcgctaccataatatttttaaagg\n+aactgaatactgaaagatgtgaccaaaatggtagaatatcgctaacgaaa\n+atcacagacgcaaatatcgatagtggcctagagacccgatatcgccgccg\n+tgattatcgatgaaactacagctgagctgcttgtttacattctaaagtta\n+aaatttttaaaaatttccaatacaaaaaaaaattcttcgtattaacacgg\n+ctgtttgccggtaagatctgaagtggattcaagttgcagcaccctcagga\n+aaagaataaattattataatataataatataatataaagaatttaaattc\n+gaaccaaacgcggtatgtcactcttcatttagagaatttgtggctttgtt\n+ttgtacaaaaaactattcttttttcttgaaaattagcatcaagaatttat\n+tgccaaaacagaacgcaattctgctaaatctggttaccctgccaaatggc\n+tggccgttctgtgcattcaataacacaagctaacggcttacggcacattc\n+ggaaagatccaaaagaatttctattccggcattttttatgtattatcgtc\n+caatttttgtatgtgtattcattcttttggctgtccgttattaagctttt\n+cttatccacataagctgaccagctgccaaaaagccgaccgttttgtgcag\n+ccaataacccaagctaactatcaacagcaatttcgaaaaatccagaagaa\n+gctatccttcggcaattttttcttatattataatttttttgtcacaattt\n+tttgtcaaaaaatcgacaataattaagtgtttacattttaatgcgattta\n+attggaaatttgaattcgaatcaaacgcggtgtcactctacatatggaca\n+atttgtggctacaaaattagttgaaagtgcatgtttttttttaagtgaaa\n+taaaacaaaaacagcaataacaggccaaaatgttgttttataaataaata\n+cttacatatccgcatataaatatgaatgaaaaaaaaaacagcaagactgg\n+gccaaaatattgttctttaatatgtaaataaatacatatctacatataca\n+tatgtggacaaaagtgttaaaattagttaatagtgcatgttttttaaatg\n+aaaaaataaaacaaaaaaagcaagcaaatataaattatattccacacaca\n+tttacaataatttcaaactacgctaaaaatacccaacagaaaaaacatag\n+aaaggtaaacaaaatacaagaatatcaaaaaaaatctaattttcacgttg\n+ttgtggccggtaccaccggtcatacatacatatagatacatatttagatt\n+ctcacaaaaagaatagaaaattaccgctaatttccttatatacgtttttg\n+ttttcagtgttttgaaatagtatttttcaaacaaaaaacaatacaaaagg\n+gttaaaaaaattgttcgattggctttaacatactaattaaaaaaaatcac\n+aattttttggcaaaatatcgaaaaaaattaagtgtttacatttttatgcg\n+aattatttggaaatttaaataagaacccaacgcggtttgtcactctacat\n+atggacaatttgtggctttgttttgaacaaaaaatgaatttttttttact\n+caaaattaacaagaagaaacagtaatgcgggttttgatcgcaattctgct\n+aaatcgaatttccgtgcccaaaggctgacctttttgtgcagcaaatagcc\n+caagctaataatctacagcaatttcttaaagaccgaaaatgtatcaactt\n+cggcaaaatatgaaaatttaacatttatttttcaacattttttgccaaat\n+aatcgacaaaaatttcagtgtcccgatttggatgcgaattaattgaggac\n+aattgtgactttgttttgtaaaaaaactattttttttactgaaaattaca\n+atcaagaatctaaggccaaatcagaatattttcactgcggttttttgatc\n+gcaattctgctaaatcgggttccgtgccaaaaagctgactatttttttta\n+ctgagaattagcatcaagaatttattgccaaaacagaacgcaattctgct\n+aaatcgggttgccctgccaaaaggctggccgttttgcgcattcaataaca\n+caagctaacggtttacagcacattcggaaagatccaaaagaatttctatt\n+tcggcattttttatgtattatcgtctaattttggtatgtgtattcattct\n+tttggctgtccgttattaagctgcttttcttattcacatattcaatggcg\n+cagtacataaaaagaatgaaccgtggttttcttgccctctccatctccct\n+ctaccacttcccctctcaacaactatctcacgcactctgtcccttctcgt\n+nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn\n+nnnnnnnnnnnnnnnnnnnnnnnnnnnnattctctctgtcgcgctcgctc\n+tctcaacatgcctttcgcaccgtatctcttcgtttctctgtatgcttgct\n+ctcttaatttaagtttctctttctttgtgttcaatatctttcgcgctcgt\n+gactcctattgaccggactctgagcggtgctcattaagagtggagttttc\n+gatgatggcgggtggcgcgaacgagaattttttacataaaggtaagttgc\n+caattcttttttaaatgtgacatacagcatgcatattttatttattgtaa\n+ttatatgtgaaagaataaaattatatgtacttttatctaatctattgcat\n+ctattttttcacaggtaaacgagtcacacgagaagtaaaacacaaacaca\n+aggatcaaaataaaattcaggtgagtgaacgaacgtgttgaaagtagtta\n+atagtgcatgtctatttaagtgtaagaaacaaaaaagacacaaatgggcc\n+aataattcgtttctttaatagataactgcctatattatgttcaaactatg\n+ctaaaaagacccaaaaggaaaacataaagtatataccttgcaaaatgaat\n+aaaaagaaatcatatatattcatatacgtttgaccggtaccaccggtcat\n+acatacataaacataaacacaaaaagactagaatataaataccatatttt\n+tgattggcttaaaaatggcacaaaaaaaagataaaaactagagtgtctag\n+ctttaagtgacaatcgactcggaaaatgatcaagaaataaatgaaaaatt\n+tgtcttgcggctttttggtcgcaattctaaatcggtttgtatgtatgtca\n+ctcttcatgtatacaatttggtggctttgtattccacaaaagtcacagta\n+gtcacaaaagttttttttactgaaaattaccatcaagagtctatggccaa\n+atcagaatattttcactgcggttttttatttcaattctgctaaatcgggt\n+ttccgtgccaaaaagcttttgcagccaacaacaatattcaattttttggc\n+aaaaaatcgacaaaaaaatt\n'
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a todo.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/todo.md Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,47 @@
+# HubArchiveCreator's TODO
+
+*TODO file inspired from: http://lifehacker.com/why-a-github-gist-is-my-favorite-to-do-list-1493063613*
+
+### TO COMPLETE
+

+- [ ] Don't let the Tool Classes manage the archive (add or remove files / folders) => Everything should pass through TrackHub or another class dedicated to that
+- [ ] Move the class and others program related files, into separated folders
+- [ ] Take into account the name of the reference genome / the change:
+  - [ ] Somebody could want to launch two visualisations of two different genomes. Repeats of Genome with extensions associated
+- [ ] Add TDD => First add the test. It should not pass. Implement. It should now pass :)
+- [ ] Replace Gff3 by an abstract class GeneralFormat, with two sub-classes GFF3Format and GTFFormat
+- [ ] TrackHub should check if the 2bit already exists instead of recreating it (which is the case atm)
+- [ ] Manage the error when a user is selecting Generic Bed instead of Bed Simple Repeats. Two options: a. Output a better error message ("Check with the other Bed options") b. Identify internally this is not a regular BED but a specific one
+- [ ] Remove the non-explicit parameters for the communication between Galaxy Wrapper and the entry point
+- [ ] Rename all occurences of `extension` which `datatype`
+- [ ] Follow https://google.github.io/styleguide/pyguide.html
+- [ ] Move to Python 3
+- [ ] Remove the repetition of the extension if it already exists
+- [ ] Better thinking about the tool_directory management / Classes path refactoring
+- [ ] Add a debug mode to have more outputs
+- [ ] Improve the standard output of HAC
+
+### DONE
+
+
+- [x] Each time a file is added => Print it in the output with the full path (or relative path to root)
+- [x] Add a script for Linux.x86_64 to download and and chmod +x the dependencies for local testing => util/install_linux_binaries.py
+- [x] Add sorting BED if not sorted (Use the output of bedToBigBed)
+- [x] Add a script to install the huba datatype
+- [x] Add the possibility to add a new item in TrackDb.txt through a public function from TrackHub.py => addTrack() in TrackHub.py
+- [x] Fix the errors for the stdio regexp not properly processed in error case (always green) => Used `detect_errors` in  galaxy wrapper and raise Exception in Python
+- [x] Add a class named ~~TrfBigProcess~~ BedSimpleRepeats
+- [x] Add a class named TrackHub: Create the base TrackHub hierarchy
+- [x] Change the Name of the classes
+- [x] Add a class named AugustusProcess: Process the Augustus output to BigBed (and others needed in TrackHub) and (create folders + add the files into the right location => Process can be ported in a class responsible for that)
+ - [x] Add a class named AugustusProcess
+ - [x] Process the Augustus output to BigBed
+ - [x] create folders + add the files into the right location
+ - [x] Creation of folders to be ported into a separated class => In Datatype.py but should be into a dedicated to file manipulation class
+ - [x] Refactoring of the AugustusProcess class to behave like a class and not like a procedural masked into a class
+ - [x] Rename AugustusProcess into something more generic if the process is shared (gtf to BigBed) => Gff3.py
+- [x] Use gffToBed for Gtf instead of GtfToGenePred => Cancelled
+- [x] Clean the mess with the File handling (sometimes File, sometimes String, sometimes open File)
+- [x] Find a way to avoid repetitions in TrackDb and Track (I repeat myself atm) => Track instance has a TrackDb instance as attribute
+- [x] Refactor the creation of the structure to TrackHub: Access to paths via this Class, and creation of file through it
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,94 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <!-- UCSC Tools in  -->
+    <!-- Useful for HAC are:
+        - twoBitInfo
+        - sort
+        - bedToBigBed
+        - gff3ToGenePred
+        - gtfToGenePred
+        - genePredToBed
+        - faToTwoBit
+        - samtools
+    -->
+    <package name="ucsc_tools" version="312">
+        <repository changeset_revision="2d6bafd63401" name="package_ucsc_tools_312" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+
+    <package name="samtools" version="1.2">
+        <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+
+    <!-- TODO: Tools to install -->
+    <!-- twoBitInfo / bedToBigBed / faToTwoBit -->
+
+
+    <!-- Package gff3ToGenePred -->
+    <package name="gff3ToGenePred" version="0.0.1">
+        <install version="1.0">
+            <actions>
+                <action target_directory="bin" type="download_binary">
+                    <url_template os="darwin">
+                        http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.${architecture}/gff3ToGenePred
+                    </url_template>
+                    <url_template os="linux">
+                        http://hgdownload.cse.ucsc.edu/admin/exe/linux.${architecture}/gff3ToGenePred
+                    </url_template>
+                </action>
+                <action type="chmod">
+                    <file mode="750">$INSTALL_DIR/bin/gff3ToGenePred</file>
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>The tools downloaded by this dependency definition are free for academic use.</readme>
+    </package>
+
+    <!-- Package gtfToGenePred -->
+    <package name="gtfToGenePred" version="0.0.1">
+        <install version="1.0">
+            <actions>
+                <action target_directory="bin" type="download_binary">
+                    <url_template os="darwin">
+                        http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.${architecture}/gtfToGenePred
+                    </url_template>
+                    <url_template os="linux">
+                        http://hgdownload.cse.ucsc.edu/admin/exe/linux.${architecture}/gtfToGenePred
+                    </url_template>
+                </action>
+                <action type="chmod">
+                    <file mode="750">$INSTALL_DIR/bin/gtfToGenePred</file>
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>The tools downloaded by this dependency definition are free for academic use.</readme>
+    </package>
+
+    <!-- Package genePredToBed -->
+    <package name="genePredToBed" version="0.0.1">
+        <install version="1.0">
+            <actions>
+                <action target_directory="bin" type="download_binary">
+                    <url_template os="darwin">
+                        http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.${architecture}/genePredToBed
+                    </url_template>
+                    <url_template os="linux">
+                        http://hgdownload.cse.ucsc.edu/admin/exe/linux.${architecture}/genePredToBed
+                    </url_template>
+                </action>
+                <action type="chmod">
+                    <file mode="750">$INSTALL_DIR/bin/genePredToBed</file>
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>The tools downloaded by this dependency definition are free for academic use.</readme>
+    </package>
+</tool_dependency>
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trackHub/README.md Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,12 @@
+This folder exists to configure Galaxy to handle the files generated by HubArchiveCreator tool
+
+* The **content** of datatypes_conf.xml should go inside config/datatypes_conf.xml, under \<registration\> (Copy config/datatypes_conf.xml.sample to config/datatypes_conf.xml if it does not exist yet)
+* The content of tracks_partial.py should go inside `lib/galaxy/datatypes/tracks.py`
+* trackhub.xml should go inside display_application/ucsc/
+
+
+TODO:
+
+- [x] Create a script to copy all these files directly into galaxy => Done but not usable now I have changed the datatype to match Galaxy IUC
+- [ ] Need to modify the script `util/add_datatype.py` to match the changes introduced in https://github.com/galaxyproject/galaxy/pull/2348
+- [x] Create a package in ToolShed that install directly this when installing HubArchiveCreator => Not recommended by Galaxy IUC and team
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trackHub/datatypes_conf.xml Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,3 @@
+<datatype extension="trackhub" type="galaxy.datatypes.tracks:UCSCTrackHub" display_in_upload="true">
+    <display file="ucsc/trackhub.xml" />
+</datatype>
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/trackhub.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trackHub/trackhub.xml Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,6 @@
+<display id="ucsc_trackhub" version="1.0.0" name="display at Track Hub UCSC">
+    <link id="main" name="main">
+        <url>https://genome.ucsc.edu/cgi-bin/hgHubConnect?hubUrl=${qp($hub_file.url + '/myHub/hub.txt')}&amp;hgHub_do_firstDb=on&amp;hgHub_do_redirect=on&amp;hgHubConnect.remakeTrackHub=on</url>
+        <param type="data" name="hub_file" url="galaxy_${DATASET_HASH}" allow_extra_files_access="True" />
+    </link>
+</display>
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a trackHub/tracks_partial.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trackHub/tracks_partial.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,56 @@
+"""
+HubAssembly datatype
+"""
+import logging
+
+from galaxy.datatypes.text import Html
+
+log = logging.getLogger( __name__ )
+
+# !!! README !!! The content of this file should be added in tracks.py, but do it carefully!
+# Don't erase the existing content
+
+
+class UCSCTrackHub( Html ):
+    """
+    derived class for BioC data structures in Galaxy
+    """
+
+    file_ext = 'trackhub'
+    composite_type = 'auto_primary_file'
+
+    def __init__( self, **kwd ):
+        Html.__init__( self, **kwd )
+
+    def generate_primary_file( self, dataset=None ):
+        """
+        This is called only at upload to write the html file
+        cannot rename the datasets here - they come with the default unfortunately
+        """
+        rval = [
+            '<html><head><title>Files for Composite Dataset (%s)</title></head><p/>\
+            This composite dataset is composed of the following files:<p/><ul>' % (
+                self.file_ext)]
+        for composite_name, composite_file in self.get_composite_files(dataset=dataset).iteritems():
+            opt_text = ''
+            if composite_file.optional:
+                opt_text = ' (optional)'
+            rval.append('<li><a href="%s">%s</a>%s' % (composite_name, composite_name, opt_text))
+        rval.append('</ul></html>')
+        return "\n".join(rval)
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek = "Track Hub structure: Visualization in UCSC Track Hub"
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Track Hub structure: Visualization in UCSC Track Hub"
+
+    def sniff( self, filename ):
+        return False
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a trf_simpleRepeat.as
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trf_simpleRepeat.as Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,20 @@
+table simpleRepeat
+"Describes the Simple Tandem Repeats"
+   (
+   string chrom;       "Reference sequence chromosome or scaffold"
+   uint   chromStart;  "Start position in chromosome"
+   uint   chromEnd;    "End position in chromosome"
+   string name;        "Simple Repeats tag name"
+   uint   period;      "Length of repeat unit"
+   float  copyNum;     "Mean number of copies of repeat"
+   uint   consensusSize; "Length of consensus sequence"
+   uint   perMatch;   "Percentage Match"
+   uint   perIndel;   "Percentage Indel"
+   uint   score;  "Alignment Score = 2*match-7*mismatch-7*indel; minscore=50"
+   uint   A;   "Percent of A's in repeat unit"
+   uint   C;   "Percent of C's in repeat unit"
+   uint   G;   "Percent of G's in repeat unit"
+   uint   T;   "Percent of T's in repeat unit"
+   float   entropy;   "Entropy"
+   lstring sequence;     "Sequence of repeat unit element"
+   )
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/__init__.pyc
b
Binary file util/__init__.pyc has changed
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/add_datatype.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/util/add_datatype.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+
+"""
+This script copy the huba datatype into your galaxy:
+    - Add under <registration>, the datatype_conf
+    - Add huba.xml under display_application/ucsc/
+    - Add hubAssembly.py inside lib/galaxy/datatypes
+Place yourself in the folder of the python script, and launch it
+- Based on the fact datatypes_conf
+"""
+
+import argparse
+import os
+import shutil
+import sys
+import xml.etree.ElementTree as ET
+
+
+def main(argv):
+    # Command Line parsing init
+    parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
+
+    parser.add_argument('-g', '--galaxy_root', help='Galaxy root folder', required=True)
+
+    # Get the args passed in parameter
+    args = parser.parse_args()
+
+    galaxy_root_path = args.galaxy_root
+
+    add_datatype_conf(galaxy_root_path)
+    add_huba_xml(galaxy_root_path)
+    add_hubAssembly(galaxy_root_path)
+
+
+def add_datatype_conf(galaxy_root_path):
+    print "======= Add datatype ======="
+    datatype_conf_path = os.path.join(galaxy_root_path, 'config/datatypes_conf.xml')
+    # TODO: Not relative to this python file but based on a parameter galaxy_root
+    # TODO: Check if datatypes_conf.xml, if not create it by copying datatypes_conf.xml.sample
+    # TODO: For debug only
+    # tree = ET.parse('../test-data/add_datatype/datatypes_conf.xml.sample')
+    # TODO: UnComment for prod
+    tree = ET.parse(datatype_conf_path)
+    root = tree.getroot()
+    print root.tag
+    registration = root[0]
+    print registration.attrib
+
+    huba_datatype = ET.parse('../trackHub/datatypes_conf.xml').getroot()
+    # TODO: Verify the datatype is not already existing, else do not add / write. And in another version, check it
+    registration.append(huba_datatype)
+    tree.write(datatype_conf_path)
+    print "datatype added in %s" % datatype_conf_path
+    return
+
+
+def add_huba_xml(galaxy_root_path):
+    print "======= Add hub xml ======="
+    displayApp_ucsc_path = os.path.join(galaxy_root_path, "display_applications/ucsc/")
+    shutil.copy("../trackHub/huba.xml", displayApp_ucsc_path)
+    print "Content of %s now: %s" % (displayApp_ucsc_path, os.listdir(displayApp_ucsc_path))
+    return
+
+
+def add_hubAssembly(galaxy_root_path):
+    print "======= Add hubAssembly ======="
+    datatype_lib_path = os.path.join(galaxy_root_path, "lib/galaxy/datatypes/")
+    shutil.copy("../trackHub/hubAssembly.py", datatype_lib_path)
+    print "Content of %s now: %s" % (datatype_lib_path, os.listdir(datatype_lib_path))
+    return
+
+
+if __name__ == "__main__":
+    main(sys.argv)
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/cleanDirectory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/util/cleanDirectory.py Wed Jul 13 13:36:37 2016 -0400
b
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+"""Use to clean the directory after the run of HubArchiveCreator.py manually"""
+import os
+import shutil
+
+# Remove 'myHub.zip at root folder
+try:
+    os.remove('myHub.zip')
+except OSError as o:
+    # We don't need to crash the program
+    print 'Warning: ' + str(o)
+
+# Remove 'myHub' folder and its content
+shutil.rmtree('myHub', ignore_errors=True)
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/install_linux_binaries
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/util/install_linux_binaries Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+localBinPath="/usr/local/bin/"
+hgDownloadURl="http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/"
+
+# Binaries needed for HAC
+twoBitInfo="twoBitInfo"
+faToTwoBit="faToTwoBit"
+gff3ToGenePred="gff3ToGenePred"
+gtfToGenePred="gtfToGenePred"
+genePredToBed="genePredToBed"
+# sort="sort"
+bedToBigBed="bedToBigBed"
+
+binariesArray=( ${twoBitInfo} ${faToTwoBit} ${gff3ToGenePred} ${gtfToGenePred} ${genePredToBed} ${bedToBigBed} )
+
+# Download and install binaries
+for binaryName in "${binariesArray[@]}"
+do
+    binaryInstallationPath="${localBinPath}/${binaryName}"
+    sudo wget -P ${localBinPath} "${hgDownloadURl}/${binaryName}"
+    sudo chown ${USER} ${binaryInstallationPath}
+    sudo chmod +x ${binaryInstallationPath}
+done
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/subtools.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/util/subtools.py Wed Jul 13 13:36:37 2016 -0400
[
@@ -0,0 +1,159 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+"""
+This class handles the subprocess calls of the different tools used
+in HubArchiveCreator
+"""
+
+import os
+import subprocess
+
+
+def _handleExceptionAndCheckCall(array_call, **kwargs):
+    """
+    This class handle exceptions and call the tool.
+    It maps the signature of subprocess.check_call:
+    See https://docs.python.org/2/library/subprocess.html#subprocess.check_call
+    """
+    stdin = kwargs.get('stdin')
+    stdout = kwargs.get('stdout')
+    stderr = kwargs.get('stderr')
+    shell = kwargs.get('shell')
+    try:
+        p = subprocess.check_call(array_call, stdin=stdin, stdout=stdout, stderr=stderr, shell=shell)
+    except subprocess.CalledProcessError:
+        raise
+    return p
+
+
+def twoBitInfo(two_bit_file_name, two_bit_info_file):
+    """
+    Call twoBitInfo and write the result into twoBit_info_file
+    :param two_bit_file_name:
+    :param two_bit_info_file:
+    :return the subprocess.check_call return object:
+    """
+    array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def faToTwoBit(fasta_file_name, mySpecieFolder):
+    """
+    This function call faToTwoBit UCSC tool, and return the twoBitFile
+    :param fasta_file_name:
+    :param mySpecieFolder:
+    :return:
+    """
+    baseNameFasta = os.path.basename(fasta_file_name)
+    suffixTwoBit, extensionTwoBit = os.path.splitext(baseNameFasta)
+    nameTwoBit = suffixTwoBit + '.2bit'
+
+    with open(os.path.join(mySpecieFolder, nameTwoBit), 'w') as twoBitFile:
+        array_call = ['faToTwoBit', fasta_file_name, twoBitFile.name]
+        _handleExceptionAndCheckCall(array_call)
+
+    return twoBitFile
+
+
+def gtfToGenePred(input_gtf_file_name, gene_pred_file_name):
+    """
+    Call gtfToGenePred and write the result into gene_pred_file_name
+    :param input_gtf_file_name:
+    :param gene_pred_file_name:
+    :return:
+    """
+    array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name):
+    """
+    Call gff3ToGenePred and write the result into gene_pred_file_name
+    :param input_gff3_file_name:
+    :param gene_pred_file_name:
+    :return:
+    """
+    array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def genePredToBed(gene_pred_file_name, unsorted_bed_file_name):
+    """
+    Call genePredToBed and write the result into unsorted_bed_file_name
+    :param gene_pred_file_name:
+    :param unsorted_bed_file_name:
+    :return:
+    """
+    array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def sort(unsorted_bed_file_name, sorted_bed_file_name):
+    """
+    Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name
+    :param unsorted_bed_file_name:
+    :param sorted_bed_file_name:
+    :return:
+    """
+    array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name):
+    """
+    Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name
+    :param two_bit_info_file_name:
+    :param chrom_sizes_file_name:
+    :return:
+    """
+    array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, typeOption=None, autoSql=None):
+    """
+    Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name
+    :param sorted_bed_file_name:
+    :param chrom_sizes_file_name:
+    :param big_bed_file_name:
+    :return:
+    """
+    array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name]
+    if typeOption:
+        array_call.append(typeOption)
+    if autoSql:
+        array_call.append(autoSql)
+
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def sortBam(input_bam_file_name, output_sorted_bam_name):
+    """
+    Call samtools on input_bam_file_name and output the result in output_sorted_bam_name
+    :param input_bam_file_name:
+    :param output_sorted_bam_name:
+    :return:
+    """
+    array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+
+def createBamIndex(input_sorted_bam_file_name, output_name_index_name):
+    """
+    Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name
+    :param input_sorted_bam_file_name:
+    :param output_name_index_name:
+    :return:
+    """
+    array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
b
diff -r 0f3bc17e5ede -r fb5e60d4d18a util/subtools.pyc
b
Binary file util/subtools.pyc has changed