Repository 'msp_sr_bowtie_cascade'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/msp_sr_bowtie_cascade

Changeset 0:0528fced93a9 (2015-05-27)
Commit message:
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
added:
sRbowtieCascade.py
sRbowtieCascade.xml
test-data/Cascade_Annotation_Analysis.tab
test-data/Ensembl_transposon_set.fa
test-data/dme_miR21_hairpin.fa
test-data/dmel-2L-r6.04.fasta
test-data/sample1.fa
test-data/sample2.fa
test-data/sample3.fa
tool-data/bowtie_indices.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 0528fced93a9 sRbowtieCascade.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sRbowtieCascade.py Wed May 27 17:31:35 2015 -0400
[
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+# small RNA oriented bowtie wrapper in cascade for small RNA data set genome annotation
+# version 0.9 13-6-2014
+# Usage sRbowtie_cascade.py see Parser() for valid arguments
+# Christophe Antoniewski <drosofff@gmail.com>
+
+import sys, os, subprocess, tempfile, shutil, argparse
+from collections import defaultdict
+
+def Parser():
+  the_parser = argparse.ArgumentParser()
+  the_parser.add_argument('--output', action="store", type=str, help="output file")
+  the_parser.add_argument('--num-threads', dest="num_threads", action="store", type=str, help="number of bowtie threads")
+  the_parser.add_argument('--mismatch', action="store", type=str, help="number of mismatches allowed")
+  the_parser.add_argument('--indexing-flags', dest="indexing_flags", nargs='+', help="whether the index should be generated or not by bowtie-buid")
+  the_parser.add_argument('--index',nargs='+', help="paths to indexed or fasta references")
+  the_parser.add_argument('--indexName',nargs='+', help="Names of the indexes")
+  the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
+  the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
+  args = the_parser.parse_args()
+  return args

+def stop_err( msg ):
+  sys.stderr.write( '%s\n' % msg )
+  sys.exit()
+
+def bowtie_squash(fasta):
+  tmp_index_dir = tempfile.mkdtemp() # make temp directory for bowtie indexes
+  ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
+  ref_file_name = ref_file.name
+  ref_file.close() # by default, delete the temporary file, but ref_file.name is now stored in ref_file_name
+  os.symlink( fasta, ref_file_name ) # symlink between the fasta source file and the deleted ref_file name
+  cmd1 = 'bowtie-build -f %s %s' % (ref_file_name, ref_file_name ) # bowtie command line, which will work after changing dir (cwd=tmp_index_dir)
+  try:
+    FNULL = open(os.devnull, 'w')
+    tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name # a path string for a temp file in tmp_index_dir. Just a string
+    tmp_stderr = open( tmp, 'wb' ) # creates and open a file handler pointing to the temp file
+    proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=FNULL, stdout=FNULL ) # both stderr and stdout of bowtie-build are redirected in  dev/null
+    returncode = proc.wait()
+    tmp_stderr.close()
+    FNULL.close()
+    sys.stdout.write(cmd1 + "\n")
+  except Exception, e:
+    # clean up temp dir
+    if os.path.exists( tmp_index_dir ):
+      shutil.rmtree( tmp_index_dir )
+      stop_err( 'Error indexing reference sequence\n' + str( e ) )
+  # no Cleaning if no Exception, tmp_index_dir has to be cleaned after bowtie_alignment()
+  index_full_path = os.path.join(tmp_index_dir, ref_file_name) # bowtie fashion path without extention
+  return index_full_path  
+  
+def make_working_dir():
+  working_dir = tempfile.mkdtemp()
+  return working_dir
+  
+def Clean_TempDir(directory):
+  if os.path.exists( directory ):
+    shutil.rmtree( directory )
+  return
+
+def bowtie_alignment(command_line="None", working_dir = ""):
+  FNULL = open(os.devnull, 'w')
+  p = subprocess.Popen(args=command_line, cwd=working_dir, shell=True, stderr=FNULL, stdout=FNULL)
+  returncode = p.wait()
+  sys.stdout.write("%s\n" % command_line)
+  FNULL.close()
+  #p = subprocess.Popen(["wc", "-l", "%s/al.fasta"%working_dir], cwd=working_dir, stdout=subprocess.PIPE)
+  #aligned =  p.communicate()[0].split()[0]
+  aligned = 0
+  try: # hacked at gcc2014 in case of no alignment, no al.fasta file generated (?)
+    F = open ("%s/al.fasta" % working_dir, "r")
+    for line in F:
+      aligned += 1
+    F.close()
+  except: pass
+  sys.stdout.write("Aligned: %s\n" % aligned)
+  return aligned/2
+
+def CommandLiner (v_mis="1", pslots="12", index="dum/my", input="dum/my", working_dir=""):
+  return "bowtie -v %s -k 1 --best -p %s --al %s/al.fasta --un %s/unal.fasta --suppress 1,2,3,4,5,6,7,8 %s -f %s" % (v_mis, pslots, working_dir, working_dir, index, input)
+
+def __main__():
+  args = Parser()
+  ## first we make all indexes available. They can be already available or be squashed by bowtie-build
+  ## we keep them in a list that alternates indexPath and "toClear" or "DoNotDelete"
+  BowtieIndexList = []
+  for indexing_flags, bowtiePath in zip (args.indexing_flags, args.index):
+    if indexing_flags == "history":
+      BowtieIndexList.append ( bowtie_squash (bowtiePath) )
+      BowtieIndexList.append ( "toClear" )
+    else:
+      BowtieIndexList.append ( bowtiePath )
+      BowtieIndexList.append ( "DoNotDelete") 
+  ###### temporary Indexes are generated. They must be deleted at the end (after removing file name in the temp path) 
+  ResultDict = defaultdict(list)
+  for label, input in zip(args.label, args.input): ## the main cascade, iterating over samples and bowtie indexes
+    workingDir = make_working_dir()
+    cmd = CommandLiner (v_mis=args.mismatch, pslots=args.num_threads, index=BowtieIndexList[0], input=input, working_dir=workingDir)
+    ResultDict[label].append( bowtie_alignment(command_line=cmd, working_dir = workingDir) ) # first step of the cascade
+    if len(BowtieIndexList) > 2: # is there a second step to perform ?
+      os.rename("%s/al.fasta"%workingDir, "%s/toAlign.fasta"%workingDir) ## end of first step. the aligned reads are the input of the next step
+      cmd = CommandLiner (v_mis=args.mismatch, pslots=args.num_threads, index=BowtieIndexList[2], input="%s/toAlign.fasta"%workingDir, working_dir=workingDir)
+      ResultDict[label].append( bowtie_alignment(command_line=cmd, working_dir = workingDir) )## second step of the cascade
+    if len(BowtieIndexList) > 4:  ## remaining steps
+      for BowtieIndexPath in BowtieIndexList[4::2]:
+        try:
+          os.unlink("%s/al.fasta" % workingDir) # hacked at gcc 2014, to remove previous al.fasta file that may interfere with counting if new al.fasta is empty
+        except: pass
+        os.rename("%s/unal.fasta"%workingDir, "%s/toAlign.fasta"%workingDir)
+        cmd = CommandLiner (v_mis=args.mismatch, pslots=args.num_threads, index=BowtieIndexPath, input="%s/toAlign.fasta"%workingDir, working_dir=workingDir)
+        ResultDict[label].append( bowtie_alignment(command_line=cmd, working_dir = workingDir) )
+    Fun = open("%s/unal.fasta"%workingDir, "r") ## to finish, compute the number of unmatched reads
+    n = 0
+    for line in Fun:
+      n += 1
+    ResultDict[label].append(n/2)
+    Fun.close()
+    Clean_TempDir (workingDir) # clean the sample working directory
+  ## cleaning
+  for IndexPath, IndexFlag in zip(BowtieIndexList[::2], BowtieIndexList[1::2]):
+    if IndexFlag == "toClear":
+      Clean_TempDir ("/".join(IndexPath.split("/")[:-1]))
+  ## end of cleaning
+  
+  
+    
+  F = open (args.output, "w")
+  print >> F, "alignment reference\t%s" % "\t".join(args.label)
+  for i, reference in enumerate(args.indexName):
+    F.write ("%s" % reference)
+    for sample in args.label:
+      F.write ("\t%s" % "{:,}".format(ResultDict[sample][i]) )
+    print >> F
+  F.write ("Remaining Unmatched")
+  for sample in args.label:
+    F.write ("\t%s" % "{:,}".format(ResultDict[sample][-1]) ) 
+  print >> F
+
+  F.close()
+
+if __name__=="__main__": __main__()
b
diff -r 000000000000 -r 0528fced93a9 sRbowtieCascade.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sRbowtieCascade.xml Wed May 27 17:31:35 2015 -0400
[
@@ -0,0 +1,151 @@
+<tool id="sRbowtie_cascade" name="Annotate smRNA datasets" version="1.0.1">
+  <description>Using iterative sRbowtie Alignments</description>
+  <requirements>
+        <requirement type="package" version="0.12.7">bowtie</requirement>
+  </requirements>
+  <command interpreter="python"> sRbowtieCascade.py --output $output
+                                                    --num-threads \${GALAXY_SLOTS:-4} ## number of processors to be handled by bowtie
+                                                    --mismatch $mismatches
+                                                    --input
+     #for $i in $input:
+       $i
+     #end for
+                                                   --label
+                                                    #for $i in $input:
+                                                      "$i.name"
+                                                    #end for
+                                                   --index
+                                                    #if $refGenomeSource1.genomeSource == "history":
+                                                      $refGenomeSource1.ownFile
+                                                    #else:
+                                                      $refGenomeSource1.index.fields.path
+                                                    #end if
+                                                    #for $i in $AdditionalQueries:
+       #if $i.refGenomeSource.genomeSource == "history":
+         $i.refGenomeSource.ownFile
+                                                      #else:
+                                                        $i.refGenomeSource.index.fields.path
+                                                      #end if
+                                                    #end for
+                                                   --indexing-flags
+     $refGenomeSource1.genomeSource
+                                                    #for $i in $AdditionalQueries:
+       $i.refGenomeSource.genomeSource
+                                                    #end for
+                                                   --indexName
+                                                    #if $refGenomeSource1.genomeSource == "history":
+                                                      "$refGenomeSource1.ownFile.name"
+                                                    #else:
+                                                      "$refGenomeSource1.index.fields.name"
+                                                    #end if
+                                                    #for $i in $AdditionalQueries:
+                                                      #if $i.refGenomeSource.genomeSource == "history":
+                                                        "$i.refGenomeSource.ownFile.name"
+                                                      #else:
+                                                        "$i.refGenomeSource.index.fields.name"
+                                                      #end if
+                                                    #end for
+  </command>
+  <inputs>
+      <param name="input" type="data" format="fasta" label="Input fasta file: reads clipped from their adapter" help="Only with clipped, raw fasta files" multiple="true"/>
+    <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments">
+        <option value="0">0</option>
+        <option value="1" selected="true">1</option>
+        <option value="2">2</option>
+        <option value="3">3</option>
+    </param>
+<!-- First bowtie index selection -->
+    <conditional name="refGenomeSource1">
+      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+        <option value="indexed">Use a built-in index</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="indexed">
+        <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact instance administrator">
+          <options from_data_table="bowtie_indexes"/>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
+      </when>
+    </conditional>
+<!-- End of first bowtie index selection -->
+<!-- other  bowtie index selections -->
+    <repeat name="AdditionalQueries" title="Additional Alignment Step">
+ <conditional name="refGenomeSource">
+       <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+ <option value="indexed">Use a built-in index</option>
+ <option value="history">Use one from the history</option>
+ </param>
+ <when value="indexed">
+ <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact instance administrator">
+ <options from_data_table="bowtie_indexes"/>
+ </param>
+ </when>
+ <when value="history">
+ <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
+ </when>
+        </conditional>
+    </repeat>
+<!-- End of other bowtie index selections -->
+   </inputs>
+   <outputs>
+   <data format="tabular" name="output" label="Cascade Annotation Analysis"/>
+   </outputs>
+
+
+    <tests>
+        <test>
+      <param name="input" value ="sample1.fa,sample2.fa,sample3.fa" ftype="fasta" />
+      <param name="genomeSource" value="history" />
+      <param name="ownFile" value ="dmel-2L-r6.04.fasta" ftype="fasta" />
+      <param name="AdditionalQueries_0|refGenomeSource|genomeSource" value="history"/>
+      <param name="AdditionalQueries_0|refGenomeSource|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+      <param name="AdditionalQueries_1|refGenomeSource|genomeSource" value="history"/>
+      <param name="AdditionalQueries_1|refGenomeSource|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+      <output name="output" ftype="tabular" file="Cascade_Annotation_Analysis.tab" />
+        </test>
+    </tests>
+  <help>
+
+**Intro**
+
+Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient.
+A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution.
+However, this Bowtie wrapper tool only takes FASTQ files as inputs.
+
+Here The sRbowtie wrapper specifically works with short reads FASTA inputs (-v bowtie mode, with -k 1)
+
+.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
+
+
+------
+
+**What it does**
+
+.. class:: infomark
+
+This script uses the sRbowtie wrapper to iteratively match reads on a reference indexes.
+
+Reads are Matched on DNA references as fast as possible, without taking care of mapping issues
+
+*-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8*
+
+unaligned reads at step N are used as input for sRbowtie at step N+1
+
+-----
+
+**Input formats**
+
+.. class:: warningmark
+
+*The only accepted format for the script is a raw fasta list of reads, clipped from their adapter*
+
+-----
+
+**OUTPUTS**
+
+**Annotation table**
+
+  </help>
+</tool>
b
diff -r 000000000000 -r 0528fced93a9 test-data/Cascade_Annotation_Analysis.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Cascade_Annotation_Analysis.tab Wed May 27 17:31:35 2015 -0400
b
@@ -0,0 +1,5 @@
+alignment reference sample1.fa sample2.fa sample3.fa
+dmel-2L-r6.04.fasta 9,588 8,313 9,534
+dme_miR21_hairpin.fa 1,063 1,365 2,893
+Ensembl_transposon_set.fa 6,148 5,114 4,795
+Remaining Unmatched 2,377 1,834 1,846
b
diff -r 000000000000 -r 0528fced93a9 test-data/Ensembl_transposon_set.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Ensembl_transposon_set.fa Wed May 27 17:31:35 2015 -0400
b
b'@@ -0,0 +1,10060 @@\n+>FBgn0026065_Idefix\n+GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAA\n+CACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGA\n+ATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCA\n+CTGGTAGACTAAACATCCGTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCAT\n+CGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTT\n+CAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATC\n+CAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATAT\n+TAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCA\n+GTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATT\n+TAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATTGGCGCA\n+GTCGGTTAGGATCCAATAAAATAAAAGAGTCCTTTTAGTACGGTACTGATCAACTGAAGG\n+ATATGCTATACGACTAGCTATCCAAGATCAGCGAATTAAAATAGTGATTCAAAAATATTT\n+TTTAATCCGCAAAAGAATCTACGTGAAAGTAGTATTCAAAATAAAATCCCGTGCGGTCGG\n+AAACAAAAATTAATTTAAATTTTTTAATTCCGAAACTTAAAACCAAGTTTAAAGAAAACT\n+TAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAA\n+CCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTCAA\n+AATCAAGAAAACTTAAAGCCAAAATAAGCTAGAAAACTAAAAGACATCATGGCAGTCCCA\n+CAACTCTCAGAAACACACCTAAACCAACTGCTAAACCAAATCAAAGAATTAAACTACTAC\n+GATGGCGCACCTGGCAAATTATCTGGATTCGTCAACCAAGTGGAACAACTGCTCAGTTTA\n+TACCCAACACAGGAAGCAAGACAGGCACACGTCATATATGGAGCAGTGAAGCGGTTATTA\n+GTGGATTCAGCCTTAGAAGTCGTAACCCAGGAAAGAGCTAACACATGGCTGGACATGAAG\n+AAAGCACTGGCAATGGCATTCAAAGACCATAGACCTTATGTAACTCTCATCAGACAATTA\n+GAAGACATATCATACCCAGGAAGTATCTGTAAGTTTATAGAAAAATTAGAAACACAATAC\n+TGGATTATGTTCGATAAGTTAGAATTAGAAAGTGACCATGTTGATAAATCGAATTATACC\n+GAAATGTTAAACAAAACTGTTAAATCAGTAATAGATCGAAAACTGCCGGATAGAATTTAT\n+ATGTCTTTGGCACGTAAAGATATTGATACAATTTATAAATTAAAACAAGCATCAATGGAA\n+TTAGGCCTTTATGATGCTATTCCAGAAAATCACCGTTCTAATAGAACAGAAATGAATAAA\n+CGTAGGAACAGGGGAAACTATAATCAAAATAATAATCAAAAATATTACAATAATAGAAAT\n+CACAACTACAGTAATTATTATCCTAGCATGAATCAGAATCATAATACACAACCACCTCAG\n+AATCCGACTCAACCTATGACAAATCAAAACCAATATTCACCGCGTTTCATACCGAATAAT\n+CAAAGAGGGAATTATTATGCATTTAGACGAGACTTAACACAAGCTCAGCAGAACAACCCA\n+CTTAATAACACCCTTAACTTCCAACCTTCGACATCGAATAATATTAACAGACAAGGGCCA\n+GTAAAAAGACAACGCGAGAGTCAGAGTGACCAAAGCAGGATGGATGTAAATTTTCATCAA\n+GCTGCCTCGGACACTCAAATGATAGAGAAGGACATACAAGTCCCTATGTAAAAATAATTC\n+ATCATAATAAAAATTATAAGGGAATGATCGATACAGGATCATCAATTAACATCATAAGAG\n+AAAATTTTGAGAACTTAGAAGAAAAGGAAGAAAACCTAATAGTATACACTATTAAAGGAC\n+CAATAACACTAAAGAGAAGTATAATAATAAAACCTACTTCAGTATGTCCGTCTGCTCAAA\n+AATTCTACATTCACAAATTTTCTGATAACTATGATTTCTTGTTAGGTCGAAAGTATTTAG\n+AAGATACAAAAGCTAAAATAGATTATGCTAACGAAACAGTAACACTAGGCTCAAAAGTAT\n+TTAAGTTTCTCTATGAAGAAAAGAAGGGCGAGACCGCATCCAAATGCCTTGACCCACAAG\n+AAAAGAATGATTCCGCTCTAGTGGACAGAACCAAACCAAAAATGCAAAAGGTTAAGACCG\n+CACCTAAGTGCCTTAAACCAAAGCATCAACAGCAGAAGAAAGAGACCGCATTACCCAAAT\n+GCCTCATTTCAAATGTTGTTAAAGACACAGTGGACAATGATGTAACACATCTCGATCCCA\n+TGTCCGTTGACAACGATATAGTCAACTTCGCGATTAACAATGAGTTACGCGAATGTAACG\n+AGTATAGACTCGAACACTTAAATGCAGAGGAAGTTGAATGTTTAAAGAAGTTCCTATACG\n+AATATAGAGACATTCAGTACAAAGAGGGCGAAAATTTGACCTTCACCAGTACTATTAAAC\n+ATGTCATCCAGACTCAACACGAAGACCCAGTATACCGTAAACCCTACAAGTACCCTCAAA\n+GCGTTGACCAAGAAGTTAACAAACAAATTAAAGAAATGATAGAACAAGGGATTGTTCGCA\n+AATCGAAGTCCCCTTATTGTTCTCCTATTTGGGTGGTCCCCAAGAAGGCAGACGCCTCTG\n+GGAAACAAAAATTCAGGTTGGTAGTCGATTACAGGAACCTAAATGAGATAACTGTTAACG\n+ACAAATTTCCCATTCCCCGAATGGATGAGATATTGGACAAACTAGGTAGATGCCAATACT\n+TTACCACTATAGATCTAGCCAAGGGTTTTCACCAAATCCAAATGGATGAAAATTCTATTG\n+CAAAAACAGCTTTTTCAACTAAGCATGGGCATTATGAATATACTCGTATGCCCTTTGGTT\n+TAAAAAACGCTCCAGCTACTTTTCAGAGATGCATGAATAATCTTCTGGAAGATTTAATCT\n+ACAAAGACTGTTTAGTCTATTTAGACGATATTATTGTTTATTCCACTCCATTGGAAGAAC\n+ACATTTTATCCCTAAAGAAAGTCTTTGAAAAACTGAGAGACGCTAATTTAAAGTTGCAAC\n+TAGATAAATGTGAATTCATGAAGAAAGAAACTGAATTCCTAGGACACATCGTCACAACAA\n+ATGGCATCAAACCAAATCCAAATAAAACTAAAGCAATTACAAATTTTCCATTACCCAAGA\n+CACCTAAGCAAATAAAATCATTTTTGGGATTATGTGGATTCTATCGCAAGTTTATTCCTA\n+ACTTTGCCAAAATAGTTAAACCCATGACCCTCAAATTAAAGAAAGGTGCTATAATAGACA\n+CCAAATGTAAAGAATACATCGAATCATTTGAAAAATTAAAAGTTTTGATAACTTCAGACC\n+CGATATTAATCTATCCTGATTTTTCAAAACCTTTTTCTTTGACAACTGATGCTAGCAACG\n+TAGCTATTGGTGCAGTGTTATCACAAAATCACAAGCCAGTTTGTTATGCCAGTAGAACGC\n+TAAACGAACATGAAATCAACTATGCTACGATTGAAAAAGAATTGTTAGCTA'..b'CCC\n+GTTCCAGAAAATTCTACCAGCAAACGAGCTCGCCTACACCCAAATGTTAAGTGCCACTTT\n+TGTGGAAAAATTGGCCACAAGATAGCTGACTGCCGCTCCATGAAAAACAACTTAAAGAAT\n+CAACAAGGATCTAGTTCGAGTATTGGGCGCTTATCTGACTCTAAACCTGGGTCAATTACT\n+TGCTATAGATGTGGAAACCAGGGGCATATAGCGTCAGCTTGCCCTGCAAGACAATCGTTG\n+TCAAACCAAACTAAAGCCGACGAGAAGCGTGTCAACGTGTGTCACGTAGTCGAGCCAATT\n+GGGACATTGATATCATCTGGTGAGTCGTATCCATTTTATTTCGACTCTGGAGCCGAATGC\n+TCACTTGTAAGAGAATCTGTGTCCACCCAACTCTCGGGCACACGAATTAACAACAATGTA\n+GTTTTAAAGGGTATCGGAAATAATACTGTTACCAGTACATTACAAATTTTGTCAAACGTA\n+ACAATAAGTGGTTACTGTCTCGAAGTGCTTTTTCACGTAATTCTTAATGATTGCATTAAT\n+TATAATATTATAATTGGACGCGAAATTTTAAGTCAGGGATTTAGTGCTACTATAACAATA\n+GATAAAATAGAGTTATGTAAAACAAGGTCTGTGCAAACCCTATCTGCTTAGAGTAGTAGT\n+TTTAGTCTTGAAAATGTTAATACCGAATTGTGTGGCGAGGATAGGAAAATCTTGGTAAAT\n+CTTTTGAATAAATTCTGTGACTCATTTATAGACGGTTTTCCCAAAAATCGTGTTACAACT\n+GGCGAACTAGAAGTACGCTTAATTGATCCAATAAAAACTGTACACAGACGACCGTACCGA\n+CTTAGTATAGAGGAAAAACAAATTGTCCGAAACAAGGTTAATGAGCTGCTGTTAGATAAC\n+ATCATCCGTCCTAGCAGCTCACCGTTCGCCAGTCCAGTTTTACTCGTTAAAAAGAAAAAT\n+GGTTCTGATCGCCTTTGCGTGGATTACCGCGAACTAAATACAAACACAGTTGCAGAGAAA\n+TATCCCTTACCACTAATTAGTGACCAAATATCTAGGTTGCGTGGAGCAAGTTTCTTTAGT\n+TGCTTGGATATGGCCAGCGGGTTTCATCAGATACCTATTCACGCAAATTCAATTGAGCGC\n+ACGGCTTTTGTGACACCTGACGGCCAATTCGAATTTCTAACTATGCCCTTCGGGTTAAAG\n+AATGCCCCATCCGTGTTCCAGCGTGCAGTTATGAAAGCTTTGGGTGAGCTTGCCCACTCT\n+TACGTTATCGTTTATATGGACGATATAATGATTATCGCAGAAACAAAAGAAGAAGCTTTT\n+GTAAGGTTAAGGACAGTTTTGAAAATATTATCGCAGGCTGGGTTTTCTTTTAATATCGGA\n+AAATGTTCATTCCTGAAATCTTGCATTGAATATCTGGGGTTTGTGGTAAAAGAGGGCGAA\n+ATAAGACCAAATCCATCTAAGATAAAAGCATTAGTCGCTTTACCGCCTCCGCAGTCTGTT\n+ACCCAAGTAAGACAAATTATTGGCCTAGCCTCTTATTTTAGGCAGTTTGTGCCAAAGTTT\n+TCAGAAATCATGAAACCCTTATATAGACTGACCTGCAAAAACAAAATATTTGAATGGAAA\n+CTTGAACACGAACAAATTCGTCAAAAAGTCACTAAATTGCTTACAGATGAGCCCGTCCTT\n+GTTATCTTCGATCCTCGGCATCCCATTGAACTGCATACAGATGCCAGTATGGATGGCTAC\n+GGAGCAATTCTACTCCACAAAATAGATAATAAACGTCGTGTAGTTGAGTATTACAGCAAA\n+CAAACATCCTTGACGGAATCTCGATATCATTCGTACGAGCTTGAAACTTTAGCTGTGTAT\n+AACTCCATGAGACACTTTCGTCACTATTTACATGGGCGAATTTGTTGTTTTTACAGACTG\n+TAATTCCCTAAAAGCTACTCGCAACAAGACTGAACTAACGCCGAGAGTACACCGTTGGTG\n+GGCATATATGCAGTCCTTCGACTTTGACTAGAATGACTTAGACTTAGAATATAGACCTGG\n+TGCCATAATGGCACATGTTGATTTCTTGTCACGCAATCCACTGCCATCTGCTCGGGTTAT\n+TACTGGTGAGGAAGAAAAACATGTTCTATTGGCCAAAATAACGGACAACTGGTTACTTGC\n+AGAACAGCAAAAGGATTCAGAGATTTCCACGATTGTTGTTAAAATACAGAACAATGAATT\n+GGGTGAGAGCTCGGCAAAAAGTTATGAATTACGCTCGAAAATGCTTTTTCGCAAAATTCA\n+AAGGAACGGTAAAACTCGTTGCCTGCCAGTTGCCCCCAGATCATTCAGATGGTCAGTAGT\n+GAACCAGGTCCATGAAGCAGTTGTACATTTGGGTGGGAAAAGACTTTAGACAAAATGTAC\n+GAATTTTACTGGTTTGAGAACATGGCCAAATATGTTCGTAAGTTCGTTGATAATTGCATT\n+ACGTGTAAGTTAACTAAGCCTCCGTCAGGAAAATTGCCAATCGAACTCCACCCCATACCA\n+AAAGTAGAAATTCCATGGCTATAAGTTGTACGACAAATCGCATAACGAAAGCCAGTCCTC\n+TTGAATTACTAATCGGAAAAGAATGTAGACCATTTAATATGTTACCAATATGTGAACAAG\n+TTAATAAAGTCGATGTAAATATTATAAGAAATATCGCGAGAGAAAATATTAAGAAGAACG\n+CCTTGTATGAAAAAACTAGATTCGATAAGCACAAAGCCAAATTTGATAACTTTGGTGTTG\n+GCGATTATGTTTTACTTAAGAACGAAGAAAGGCACCAAACAAAATTAGACCAAAAATATA\n+AAGGACCTTTCCTCGTGACAGAGGTACTTAAGGGAGATCGTTATATTTTAAAATCTTTAA\n+CTAATAAGCGGACTTATAAGTACCCACATGAAGCTTTGCGCAGTATGCCAACAGAGGAGA\n+TCCCCAAAGAGTTAGATCTATGTGACGATCAAGAAAACGTTGAAAGAGACGTTAGAAATC\n+CCTTGGTGGATTCCAATGTGGATGAAAACGTCGAAAGAGACGTTAGAAATCCCTTGGTGG\n+ATTCCAATGGGGATGAAAACGTTGAAAGAGACGTTAGAAATCCGTTGGTGGATGCCAATG\n+TGAGCGAAAAGTTACTGAGTTGTTTGAAGACTCAAGTGAATGAGAGGCATTGATGGATTT\n+CAATGCGAGATTGGGGACACATGCAACGTCGCCAAGTTGCCAGTGCTAGTAGGTACAAGT\n+GTTACTGTGTTGACTTATTTGATGTCTGGTGACTGGCGGCGTGGCGGGTTGAATTGTCCT\n+AGTGTGTTGCTAATAATAACAAACGATCTTCTTGGTACTTCTGTCACTCGAGTTGGTCGA\n+TAACAAGAAAAATAATAATAATAATTACGTTTAATGTTATCTTTCTAGATTAAGCTTGTT\n+TAATTTCAAAACTTATATTACACACGAGGACGTGTGCTGGTCAGGAAGGCCGTGTCGCAT\n+CATTATTAGTCTTATTTTTATTTTCTATGTTCCATCTCTAATAAACATGTCATCTCTATT\n+AAATAAAATTCGTATCGAGCTGTTCTTGTCTTCGTTTCTCTTTGATCGCTGTTCGCTGTG\n+TTCCGTTATGCGAGTTTAACGGGTTTTGCTCTGTTCTACATAGTCTCGGTTCGACGATGC\n+GTTAGAGTGAGACAAATGCTTGTCCTGTGGTGAGTTCGGACCAGCATGTATCAAGCGAGA\n+TAGAGCGATGTTGAAATGTACACGGGGCACTTATGTTTGAAAACTCTGAGAAAGCGGACG\n+CGTGAATATGTCGCAACCGAGGAAGTGTACGACTCGCGGGCGGAGCGCGGCAACAGAGGA\n+CCCCGAATCAGTTAACTTCCCGACA\n'
b
diff -r 000000000000 -r 0528fced93a9 test-data/dme_miR21_hairpin.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dme_miR21_hairpin.fa Wed May 27 17:31:35 2015 -0400
b
b'@@ -0,0 +1,774 @@\n+>dme-mir-1 MI0000116 Drosophila melanogaster miR-1 stem-loop\n+TTCAGCCTTTGAGAGTTCCATGCTTCCTTGCATTCAATAGTTATATTCAAGCATATGGAA\n+TGTAAAGAAGTATGGAGCGAAATCTGGCGAG\n+>dme-mir-2a-1 MI0000117 Drosophila melanogaster miR-2a-1 stem-loop\n+GCTGGGCTCTCAAAGTGGTTGTGAAATGCATTTCCGCTTTGCGCGGCATATCACAGCCAG\n+CTTTGATGAGCTTAGC\n+>dme-mir-2a-2 MI0000118 Drosophila melanogaster miR-2a-2 stem-loop\n+ATCTAAGCCTCATCAAGTGGTTGTGATATGGATACCCAACGCATATCACAGCCAGCTTTG\n+ATGAGCTAGGAT\n+>dme-mir-2b-1 MI0000119 Drosophila melanogaster miR-2b-1 stem-loop\n+CTTCAACTGTCTTCAAAGTGGCAGTGACATGTTGTCAACAATATTCATATCACAGCCAGC\n+TTTGAGGAGCGTTGCGG\n+>dme-mir-2b-2 MI0000120 Drosophila melanogaster miR-2b-2 stem-loop\n+TTGTGTCATTCTTCAAAGTGGTTGTGAAATGTTTGCCTTTTTATGCCTATTCATATCACA\n+GCCAGCTTTGAGGAGCGACGCGA\n+>dme-mir-3 MI0000121 Drosophila melanogaster miR-3 stem-loop\n+GATCCTGGGATGCATCTTGTGCAGTTATGTTTCAATCTCACATCACTGGGCAAAGTGTGT\n+CTCAAGATC\n+>dme-mir-4 MI0000122 Drosophila melanogaster miR-4 stem-loop\n+TTGCAATTAGTTTCTTTGGTCGTCCAGCCTTAGGTGATTTTTCCGGTCATAAAGCTAGAC\n+AACCATTGAAGTTCGTTGTGG\n+>dme-mir-5 MI0000123 Drosophila melanogaster miR-5 stem-loop\n+GCTAAAAGGAACGATCGTTGTGATATGAGTTGTTTCCTAACATATCACAGTGATTTTCCT\n+TTATAACGC\n+>dme-mir-6-1 MI0000124 Drosophila melanogaster miR-6-1 stem-loop\n+TTTAATGTAGAGGGAATAGTTGCTGTGCTGTAAGTTAATATACCATATCTATATCACAGT\n+GGCTGTTCTTTTTGTACCTAAA\n+>dme-mir-6-2 MI0000125 Drosophila melanogaster miR-6-2 stem-loop\n+TAACCCAAGGGAACTTCTGCTGCTGATATATTATTGAAAAACTACTATATCACAGTGGCT\n+GTTCTTTTTGGTTG\n+>dme-mir-6-3 MI0000126 Drosophila melanogaster miR-6-3 stem-loop\n+CAAAAAGAAGGGAACGGTTGCTGATGATGTAGTTTGAAACTCTCACAATTTATATCACAG\n+TGGCTGTTCTTTTTTGTTTG\n+>dme-mir-7 MI0000127 Drosophila melanogaster miR-7 stem-loop\n+GAGTGCATTCCGTATGGAAGACTAGTGATTTTGTTGTTTGGTCTTTGGTAATAACAATAA\n+ATCCCTTGTCTTCTTACGGCGTGCATTT\n+>dme-mir-8 MI0000128 Drosophila melanogaster miR-8 stem-loop\n+AAGGACATCTGTTCACATCTTACCGGGCAGCATTAGATCCTTTTTATAACTCTAATACTG\n+TCAGGTAAAGATGTCGTCCGTGTCCTT\n+>dme-mir-9a MI0000129 Drosophila melanogaster miR-9a stem-loop\n+GCTATGTTGTCTTTGGTTATCTAGCTGTATGAGTGATAAATAACGTCATAAAGCTAGCTT\n+ACCGAAGTTAATATTAGC\n+>dme-mir-10 MI0000130 Drosophila melanogaster miR-10 stem-loop\n+CCACGTCTACCCTGTAGATCCGAATTTGTTTTATACTAGCTTTAAGGACAAATTCGGTTC\n+TAGAGAGGTTTGTGTGG\n+>dme-mir-11 MI0000131 Drosophila melanogaster miR-11 stem-loop\n+GCACTTGTCAAGAACTTTCTCTGTGACCCGCGTGTACTTAAAAGCCGCATCACAGTCTGA\n+GTTCTTGCTGAGTGC\n+>dme-mir-12 MI0000132 Drosophila melanogaster miR-12 stem-loop\n+TACGGTTGAGTATTACATCAGGTACTGGTGTGCCTTAAATCCAACAACCAGTACTTATGT\n+CATACTACGCCGTG\n+>dme-mir-13a MI0000133 Drosophila melanogaster miR-13a stem-loop\n+TACGTAACTCCTCAAAGGGTTGTGAAATGTCGACTATTATCTACTCATATCACAGCCATT\n+TTGATGAGTTTCGTG\n+>dme-mir-13b-1 MI0000134 Drosophila melanogaster miR-13b-1 stem-loop\n+CCATGTCGTTAAAATGTTTGTGAACTTATGTATTCACAATCATATCACAGCCATTTTGAC\n+GAGTTTGG\n+>dme-mir-13b-2 MI0000135 Drosophila melanogaster miR-13b-2 stem-loop\n+TATTAACGCGTCAAAATGACTGTGAGCTATGTGGATTTGACTTCATATCACAGCCATTTT\n+GACGAGTTTG\n+>dme-mir-14 MI0000136 Drosophila melanogaster miR-14 stem-loop\n+TGTGGGAGCGAGACGGGGACTCACTGTGCTTATTAAATAGTCAGTCTTTTTCTCTCTCCT\n+ATA\n+>dme-mir-263a MI0000343 Drosophila melanogaster miR-263a stem-loop\n+TAGATCTCGGCACAGTTAATGGCACTGGAAGAATTCACGGGGTAATTTTTATACAACCCG\n+TGATCTCTTAGTGGCATCTATGGTGCGAGAATAA\n+>dme-mir-184 MI0000354 Drosophila melanogaster miR-184 stem-loop\n+GGTTGGCCGGTGCATTCGTACCCTTATCATTCTCTCGCCCCGTGTGCACTTAAAGACAAC\n+TGGACGGAGAACTGATAAGGGCTCGTATCACCAATTCATC\n+>dme-mir-274 MI0000355 Drosophila melanogaster miR-274 stem-loop\n+TCCTGTGTTGCAGTTTCGTTTTGTGACCGACACTAACGGGTAATTGTTTGGCCGCCAGGA\n+TTACTCGTTTTTGCGATCACAAATTATGAAATTGCAGCAA\n+>dme-mir-275 MI0000356 Drosophila melanogaster miR-275 stem-loop\n+TGTAAAGTCTCCTACCTTGCGCGCTAATCAGTGACCGGGGCTGGTTTTTTATATACAGTC\n+AGGTACCTGAAGTAGCGCGCGTGGTGGCAGACATATAT\n+>dme-mir-92a MI0000357 Drosophila melanogaster miR-92a stem-loop\n+AATATGAATTTCCCGTAGGACGGGAAGGTGTCAACGTTTTGCATTTCGAATAAACATTGC\n+ACTTGTCCCGGCCTATGGGCGGTTTGTAATAAACA\n+>dme-mir-219 MI0000358 Drosophila melanogaster miR-219 stem-loop\n+TAATTCGATTTTTA'..b'A\n+GGAATACATGTGCCTGTATATATATTCGGTAAAATCACATCTTTGAATT\n+>dme-mir-4980 MI0017766 Drosophila melanogaster miR-4980 stem-loop\n+GAAGAGGCTGAGAGCGGGAAAGTGAGAAGTAGAGAGCGCAGATAGGGAGTGGGAGCTTAT\n+ACCGTTATAAAGCCAACTTCCGTTCTGCGTTTCGTTCCAACCCCCCACCTCTCTTTCCCT\n+CT\n+>dme-mir-4981 MI0017767 Drosophila melanogaster miR-4981 stem-loop\n+TGGCCACGTGCCCGCAAGACGCCTTCGGCCGGCGCAAAGTTTCAATTTGATATTCCTTGG\n+CCGGTCGAGCAGCGTCTTGGGGTTGTTCGTGGATT\n+>dme-mir-4982 MI0017768 Drosophila melanogaster miR-4982 stem-loop\n+CCAACTTTGCTGGCATTCGGTGGCCAATAATGCAATTGCATGTCCTTGTTGCTGTTGCAA\n+TGTTGCAGCTGGAATTGCAATTGCATTTCTAGTTGCCGCTGCTGCTGCTCGAGTGGCAA\n+>dme-mir-4983 MI0017769 Drosophila melanogaster miR-4983 stem-loop\n+GGCTGGCTGTCTTGACCCACTTGCTCGTTTGCATTTCTGAAAAATGTATACGCAAAAACC\n+AAAATCAGAAATTCTTTTGAGCAAGTGTGTCATGAAATTGCCATTCGT\n+>dme-mir-4984 MI0017770 Drosophila melanogaster miR-4984 stem-loop\n+CTTCGCTCGAGGTGTGAAAACCTTTTGGCCAGCGAATACGCCTCGGATCCAATCACGTTT\n+CAAGATCGAGGTGAATTCTTTGACGTATTCGCTGGCCAAATGACACGTTCGCCATGTTTT\n+GCG\n+>dme-mir-4985 MI0017771 Drosophila melanogaster miR-4985 stem-loop\n+TTGGGGGCGCTGCACTGGCATTGAAAAGTGAATTACATTGATCGTGACATGGGAATGGAA\n+AATGTCGCCGAGCCATGTAAATCGTTTGGCCTACCCGTCTA\n+>dme-mir-4986 MI0017772 Drosophila melanogaster miR-4986 stem-loop\n+TTCTGCCGCTTTTGCTGTGGCTTCTCTGCATGGGATTCCCCATTCTGCATGGCGCCGATC\n+TCTGCCAGCCCATCGGATGGCGGAACTTCCAGTGCAGCGAGGTCGCTTCTCTGCAGGATC\n+TG\n+>dme-mir-4987 MI0017773 Drosophila melanogaster miR-4987 stem-loop\n+GCGGTGCCGTTGATGATGACACAGCGCGCTTGCAACAGCGTGCGGCACGATTCTCACAGC\n+AGGGCTCCAGCTCGGCCAAAAAATCGGTCGTCGCCATTGCAAGCTCACCGTTTGGTCTCA\n+CCACGGCCAAGA\n+>dme-mir-9369 MI0028918 Drosophila melanogaster miR-9369 stem-loop\n+AGCGAGAGGGTATTATGTATTCATATTTGTAATATCATGATACATATTCTCCTTTCGCTC\n+TA\n+>dme-mir-9370 MI0028919 Drosophila melanogaster miR-9370 stem-loop\n+TTTGGGTTGCCGGTACCGGGTGGTTTCCCCGATATCCACGACCCATACCA\n+>dme-mir-9371 MI0028920 Drosophila melanogaster miR-9371 stem-loop\n+AAATCACTTTGCTTGGAATTCACATTGATGTACGTAAGAATTCATCACGAAGTGGTTTCT\n+G\n+>dme-mir-9372 MI0028921 Drosophila melanogaster miR-9372 stem-loop\n+TAAAAGCACATTTCGCCAGCACGTGTACTTTCCACCTCGTCTGTGATTTTGGTTTTTAAC\n+G\n+>dme-mir-9373 MI0028922 Drosophila melanogaster miR-9373 stem-loop\n+AGTTTGTGAGGAGCGTTCGGCGGATACACATCGCACCCATCGCTCTTGGCCAGCTCGTC\n+>dme-mir-9374 MI0028923 Drosophila melanogaster miR-9374 stem-loop\n+CGTGCAATAATTTCCTCGATTGGCATCAAGTGGCTTCCAGTCGGGTACATATTGCACAAG\n+A\n+>dme-mir-9375 MI0028924 Drosophila melanogaster miR-9375 stem-loop\n+CCGAGTATATGGAATTTCTGTTTTGCCTAGTATGAACCGAAACTCGATATAATTCAGAA\n+>dme-mir-9376 MI0028925 Drosophila melanogaster miR-9376 stem-loop\n+ATAGCCAGCATGGCCAAAGCGCTGTGCTTAAGTGGTCCAAGCGACCGAAAGCACTTTGAC\n+CATGCGGTGGTGGCCA\n+>dme-mir-9377 MI0028926 Drosophila melanogaster miR-9377 stem-loop\n+CGCTCTTTCTCTCTATCTGGCCTGGCCTGGCTTGATGGGGAGAAGGAGAGGG\n+>dme-mir-9378 MI0028927 Drosophila melanogaster miR-9378 stem-loop\n+AGTGGAGTGAGACCTCGAGTTCGGGCAAATCTGATAGCCGAACGGGAGTTTCCTCCATTG\n+GG\n+>dme-mir-9379 MI0028928 Drosophila melanogaster miR-9379 stem-loop\n+CGCATGTCAGGTGGTAATCCATTAGTATAAACATGGTATGGCCACTTGACAGTCGGC\n+>dme-mir-9380 MI0028929 Drosophila melanogaster miR-9380 stem-loop\n+GCTCCTGCTGCATCTGTTGGCGATAGCGCTCGTCACTCTTGGCCGTGTACTCCGCCTCCA\n+ACAGATCCAGCAGCAGA\n+>dme-mir-9381 MI0028930 Drosophila melanogaster miR-9381 stem-loop\n+GCCAGGGAAGGGTCGAGTGCGGATCCCTTGCCCCAGCGGCTATTCGCCTGCGCACTCGGT\n+CCATCCCTGGCTCA\n+>dme-mir-9382 MI0028931 Drosophila melanogaster miR-9382 stem-loop\n+ATCACAGTGTGGCTGAGGGATTCCACTTTCCTTATAACTTTTGCCGATCCCCGGCGCCAC\n+TGTGATCGT\n+>dme-mir-9383 MI0028932 Drosophila melanogaster miR-9383 stem-loop\n+GGGTGCAGATCAAGTGCGAGCTGCGCATTCTCTCGCAGTTCGCCTTCAATCTGAACCCCG\n+A\n+>dme-mir-9384 MI0028933 Drosophila melanogaster miR-9384 stem-loop\n+AGGCATAATTCAACTCACACGTCTACAGGTACATATGTGTGTGTTCGGTTATGTACTTTG\n+>dme-mir-9385 MI0028934 Drosophila melanogaster miR-9385 stem-loop\n+ACAGTGTATGCAAATGATGAATGTGCCATATCAGAAACTATTCCTCATTGCTATACCTGT\n+GGT\n+>dme-mir-9388 MI0028938 Drosophila melanogaster miR-9388 stem-loop\n+CAAGTATTTTGGTACGTATGTATGTATGTACATACATGTGTATATGGTACATTCATGTAG\n+TACATACATACATATGTATGCAAGTACGTACG\n'
b
diff -r 000000000000 -r 0528fced93a9 test-data/dmel-2L-r6.04.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dmel-2L-r6.04.fasta Wed May 27 17:31:35 2015 -0400
b
b'@@ -0,0 +1,293924 @@\n+>2L type=golden_path_region; loc=2L:1..23513712; ID=2L; dbxref=GB:AE014134,GB:AE014134,REFSEQ:NT_033779; MD5=b6a98b7c676bdaa11ec9521ed15aff2b; length=23513712; release=r6.04; species=Dmel;\n+CGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCATTTTCTCTCCCATATTATAGGGAGAAATATG\n+ATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGGCGGATGAACGAGAT\n+GATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGC\n+GAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACCCAAAAAGACAATAC\n+ACGACAGAGAGAGAGAGCAGCGGAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGCCAACATAT\n+TGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAAACCCAAATCGACAATGCACGACAGAGGAAG\n+CAGAACAGATATTTAGATTGCCTCTCATTTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCGAGAGTAGTGC\n+CAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGGCGGATGAACGAGATGATAATATATTCAAGTTGCCGC\n+TAATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGT\n+GCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCG\n+GAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTCTATATAATGACTG\n+CCTCTCATTCTGTCTTATTTTACCGCAAACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCC\n+TCTCATTTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTTTGATT\n+TTTTGGCAACCCAAAATGGTGGCGGATGAACGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCATTGC\n+AACGTTAAATACAGCACAATATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCT\n+CTGTCTTATATTACCGCAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCGGAGATATTTAGATTGCCTATTA\n+AATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTTTA\n+CCGCAAACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCATTTTCTCTCCCATATTA\n+TAGGGAGAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGG\n+CGGATGAACGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATAT\n+ATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACC\n+CAAAAAGACAATACACGACAGAGAGAGAGAGCAGCGGAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAG\n+TAGTGCCAACATATTGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAAACCCAAATCGACAATG\n+CACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCATTTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTA\n+TGCGAGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGGCGGATGAACGAGATGATAATAT\n+ATTCAAGTTGCCGCTAATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGAGAGTAG\n+TGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACCCAAAAAGACAATACACGACAGA\n+GAGAGAGAGCAGCGGAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCT\n+CTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAAACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAG\n+ATATTTAGATTGCCTCTCATTTTCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCGAGAGTAGTGCCAACATAT\n+TGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGGCGGATGAACGAGATGATAATATATTCAAGTTGCCGCTAATCAGA\n+AATAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTAATGA\n+GTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCGGAGATATT\n+TAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTCTATATAATGACTGCCTCTCAT\n+TCTGTCTTATTTTACCGCAAACCCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCATTT\n+TCTCTCCCATATTATAGGGAGAAATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCA\n+ACCCAAAATGGTGGCGGATGAACGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCATTGCAACGTTAA\n+ATACAGCACAATATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTA\n+TATTACCGCAAACCCAAAAAGACAATACACGACAGAGAGAGAGAGCAGCGGAGATATTTAGATTGCCTATTAAATATGAT\n+CGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAAAC\n+CCAAATCGACAATGCACGACAGAGGAAGCAGAACAGATATTTAGATTGCCTCTCATTTTCTCTCCCATATTATAGGGAGA\n+AATATGATCGCGTATGCGAGAGTAGTGCCAACATATTGTGCTCTTTGATTTTTTGGCAACCCAAAATGGTGGCGGATGAA\n+CGAGATGATAATATATTCAAGTTGCCGCTAATCAGAAATAAATTCATTGCAACGTTAAATACAGCACAATATATGATCGC\n+GTATGCGAGAGTAGTGCCAACATATTGTGCTAATGAGTGCCTCTCGTTCTCTGTCTTATATTACCGCAAACCCAAAAAGA\n+CAATACACGACAGAGAGAGAGAGCAGCGGAGATATTTAGATTGCCTATTAAATATGATCGCGTATGCGAGAGTAGTGCCA\n+ACATATTGTGCTCTCTATATAATGACTGCCTCTCATTCTGTCTTATTTTACCGCAAACCCAAATCGACAATGCACGACAG\n+AGGAAGCAGAACAGA'..b'AGGTCCCGGAAAGTGGGCCATCGGAG\n+GTAGTCGCCATCGAAGACTTCCGTGTCGCATGGAGGTAAGCGGCAGCCGGACGAAATTAGCGGCTGAGAGGGTGCTTGCG\n+CGACTTGAGGCGTTGCTCTGTCGATTGTTTCGCCAATTTGTGCTGCACATGACTCGTATACTGAGTAGCAGTAGTCATAT\n+TTGGCCTGGAGAATAGGCACTGTGTCGAGGGATCCTTCTTGGGCCATTAGGTCAGAGCATGTTTCGTACTCTCTTTCCAC\n+TTTGTCCCATAAGGCTCGCACCTGTTGCAGACGGACTTGTAACGTGTGTAGGGACGGAGAGGCTTGATCAGGAGTGTTGA\n+TCTTCGCTTCGAAGAGGCTTACGCGATCGCTGACGGCGATGAATTTATGCAACGCTGCGGTTGCGGGCGTTGGCTGCTCA\n+GAGGATGCCATTTTCTTTGTGGTAGAGCGTGTGACGCGGAGGAAATCAGTCCCGACAGCGGAGGGACTCTCGGATTTTCT\n+CGATAGAGAAGACTCACTAGACGCTCGCGTCACTGGTCGGGAAATGACCAACCTAGGAGTTGTCTTCGAACTGGTCGATG\n+GCAAAAACTTTGCTTTCGGAGTGGGAGTCGCGGTTTTGACCCTGGGACTAACAGACTTGGGTGCTGGCTTACCGCGAGTG\n+GATAGCGGAGATTGCGTGTTGAACTTTTGTTCTTTTCCAGGTGCGGGTGTCTTTTTCTTGTCTCCCTCTAGGGGCATGCT\n+CAGCTATGCCCTAGGAGAAGGAAGTCAAGAAGGCCTGCACGCCGCAAAAAATGTGGAATTGAAAAAGAACCAGACACAGA\n+GGGCACCCAAATCTGGATGGACAAAGGATCCCGTCGGAATTCGGGAGAAAGTTGCAATTGGGATCTGGAGATTGGAGCAC\n+GAAAGGAAAAAAATCCCAATTTTTGTGAAATAAGGCCCCAATAGATCTTCAATCAACTGGAAAAAAGCTTAAATGAAGCA\n+CAACATAGCTGAAATTCAGTGTGAAAAAATAGCTAGGAGGATTCACAAAAAAAGCTAGAGTGAAGTCCACAAGAAGATAT\n+AGCTATATTGGAGTCTAGAAAAACAATAGCTCAAATTTATATAAAAAAAAACTATGAATTGCAATTTCTGGAAAAAATAG\n+CTAAAAAAAAAATAGCTACCAGCTGGAGTATATATTTGAATTTATGGAAAAATAGCTAAATAATAGCTACCAGATGGTGT\n+CTGGGTAATAGCCAATAATTTATAAAAATGGAGAAACATATATATGAAATGTAATCTCGGAAGAAAAAGTCAGGGTTTAT\n+TTTATTCTGTATGTCGAGATTTGTGTTTTATTTTGAGCCAAGAGATAAAATTTGTTCTCAGTTCGGCTTCGATATTATTT\n+ATGATTTTTGGTAATATATTTCATTTGTGTGTTAAATTGGTTAAATCTATGGAGTGATTCTTATATATGTATGTGGATGT\n+AAATTTTATTGGAATATATGTGGATATGTATGGATATGTATATTTATGGGTATGTATAATTTTATGTGTATGTAAAATTT\n+TGCTGAATATATGTATATGTATATTTGATTTAATTTAATTGAATTTGTGGTTTGTATATTTTATTGCGGCAAAAAAACAG\n+CAGTTTAGCAGCGGACGATAATAGTGAATTTTGGACAGTGTTTAAAAAAATGTAAGTATATATATGTATATGCGCTTGGA\n+TACCAGCGGATCACCGTGAGACGAATTAGGGGGCCGGTATTGGCAAAGTGCTTGTTTATGCACTTAAAAAAAAGAAGTAA\n+TCACGGTGGCTGGGCGCATGTACTTATATATTTTCACTTAATTTTCACTTTTCACTTGCTGCCGTTGCGCCCTTCTGGCG\n+TGAACAGCTGCTGAGTTCTATTCTTAAATCTATGTTTGAGGCCTATGACCGCGCTAATCACCTCCCGCGTGGTCATATCT\n+CTTGACACTTCGGCAATAGACCGCTGCAATCGTACTTATCTGTAGTTGCCACTTATGCTGTCCAGTGACATGTTTATTGC\n+AGCCCATAGTTAGAACATATGTTCTTTTCTCTCTATAGTTTCCCTACTTATCATATCTAAACACATATTTAGACAGCAAC\n+GTTATGAGGAAATCGGTGCTGTATATGATGGATCGCTGTTTGTTACTACAATTGTGCCATTAAAATTAGTACACAATTGA\n+CAAAAAATAATTTGTATCAACAGAGAGCTTATATTAGCAGAAAAAGAAAACTTGGATACCCATATATTTGCTTAAAATAA\n+ACAACATTAATATATTTGTAAAACATGTCTTTTAAATGACACTAATAGACGTAAAAAAATTTTTTTCTTTAGTAAGGTAA\n+ATATTTTTATGGCATATCGGCCAGATCGTGTATATGGCAGCTATATGAAAGTTGACCAAATCACTTGAAACTTTGTGAAC\n+CATCTTGGTGGAAGTAAAGAGTAACACAAACCAAATTTTGTGAAGATAGGTCATCATTTCGAATTTCTGCCAAAAAAAAA\n+AAAAACACATAAAAAACCACTGTGCGATGGTACCCAATTCTAAAAAAGAAAAAGAGATCAGAGCTTTCTTTTGATTAACC\n+GGTTATAATCGTAAATTTATCCCAAATTACGCACACAAATGCCAGTAATTTAGCCCTTGGGTCAGTACTTCCTCAAAATT\n+ATCATCCAATGCAGACGAGTTAAATATAACAGTTTACCATTCCTAGTGACCATCAACCTCTTAGATGGCTGAATAACTTA\n+AAGGAGTCGAATGCTAAGTTGCAAAGATGGAGAGCCAAATTAAACGAATATCAATTTAAAATCAATTATATTAAAGGAAA\n+GCAGAAATCAATTAAAAATCCGATAAAAATAAAGTAAAGTCAATTAAACAATACAGTAGAAATGCCAAACAAATTTTACT\n+TTTACAAGAGCGATGCAGATTTTTAAATTATTCAAAAATTAATAACTCCTGCACTACAACACTACAGAGAAGAGAAGAGA\n+AGAGAGAAGAGAGAATAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAAGAGAGAAGAGA\n+GAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAAAA\n+GAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAG\n+AGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGA\n+AGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGA\n+GAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAG\n+AAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAACAGAGAACAGAGAACAGAGAACAGAGAAC\n+AGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGA\n+GAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAAGAGAGAAGAGAGAAGAGAGAA\n+GAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAG\n+AGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGA\n+ACAGAGAACAGAGAACAGAGAACAGAGAAGAG\n+\n'
b
diff -r 000000000000 -r 0528fced93a9 test-data/sample1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.fa Wed May 27 17:31:35 2015 -0400
b
b'@@ -0,0 +1,50000 @@\n+>25001\n+TGGGTGTCCATCTTGTGACAGTACT\n+>25002\n+CGTGATGTGACGTAGAGGAA\n+>25003\n+TAAGACTATAATTGATGGAATGAACT\n+>25004\n+GATATGCTTCTCATGTAGGAATTGAGC\n+>25005\n+TCACTGGGCTTTGTTTATCTCA\n+>25006\n+TCTTGTAGGTGAGTCTCGGTGATCGA\n+>25007\n+TGAACACAGCTGGTGGTATCC\n+>25008\n+CGTCAATATGTTGCAGGAGTAAAGA\n+>25009\n+TTAATTGACTTTTAAATTGTTCTGGA\n+>25010\n+CCTGATATGTTGACGTAGTTGCTCGA\n+>25011\n+ATGTCATCGATGCGCAGATTTTTGG\n+>25012\n+ATTATTAAATCTTTGGATACCA\n+>25013\n+TCACTGGGCTTTGTTTATCTCA\n+>25014\n+GAACAGAATTCTTTGAAACGGGGTA\n+>25015\n+TATCACAGCCATTTTGACGAGTT\n+>25016\n+AACGGATAGAGCGAATTCTGAGTGGT\n+>25017\n+TGTGATGTGACGTAGTGGAACA\n+>25018\n+TGAAGATATTTATTAAATTAAAGTGG\n+>25019\n+ACTACATATGGTTGAGGGTTGTA\n+>25020\n+TAATTATCTTTGACCATTGTTTTGT\n+>25021\n+TCACTGGGCTTTGTTTATCTCA\n+>25022\n+TCACTGGGCTTTGTTTATCT\n+>25023\n+AGCGGACAACCTAGCCAGG\n+>25024\n+ATCGGTTGTTCAGTGGTAGAATGCT\n+>25025\n+GACAGCGAACTCAGGATTTGTGGA\n+>25026\n+ACCCGGGCGGGAACACCA\n+>25027\n+TGTGATGTGACGTAGTGGAAC\n+>25028\n+TCTTTGGTGATTTTAGCTGTAT\n+>25029\n+TACTGAAGGAATCTTCTTACATTTCCC\n+>25030\n+TTACGACAAGTCGAACAAAGCTGC\n+>25031\n+TGACTACTCCAGACACCTTGATATG\n+>25032\n+CTATCAGCCATCCTGAGC\n+>25033\n+TATCACAGCCATTTTGACGAGA\n+>25034\n+CGGTACTATTACTTTGAACA\n+>25035\n+ATTAATAGTATAGATACCA\n+>25036\n+ATATGAGGACTGGATGCACATTTTG\n+>25037\n+TAAGGTAAAGTAAGATTTCTCATTGGTG\n+>25038\n+AAACAAATTCTTTAATAAAACGA\n+>25039\n+TCACTGGGCTTTGTTTATCTCA\n+>25040\n+TAATGGACTTCGAAGTTGAAGCGGGC\n+>25041\n+TGTGATGTGACGTAGTGGAAC\n+>25042\n+TGGAAGACTAGTGATTTTGTTG\n+>25043\n+TTAGCTTTTCATTGTCGTGTAAGAGTT\n+>25044\n+GCTATTGATGCGAAGTCTTTGA\n+>25045\n+TTTAAGAAGACGGACTCGGTT\n+>25046\n+CGATTGATTTGCTCAGGATTGCTGA\n+>25047\n+CGGCGACTTCAAGTTCTGTGACG\n+>25048\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25049\n+GCCTGAGAGCTCCGGAAGGACGTACGC\n+>25050\n+GGTTCTATTGTGTAATGG\n+>25051\n+ATCGGTGGTTCAGTGGTAGAATGCT\n+>25052\n+AAATCATCCGAACACATGGTCGAA\n+>25053\n+CACCGGAGCGTTGACCTGAGTTGGC\n+>25054\n+TTATTGTGCCACTGAGTCGACTATTAA\n+>25055\n+TTGTGTATTATGATTCTGATTCGTG\n+>25056\n+TCCGGCCAAATATGAACGACGTAAGG\n+>25057\n+TTGAGCTGTACTACGCTAATATGAGTG\n+>25058\n+TAATTGCATCTGACTTAGCACCACAAGA\n+>25059\n+GGGGATGTAGCTCAGTGGTAGAG\n+>25060\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25061\n+TCACTGGGCTTTGTTTATCTC\n+>25062\n+TAAGATTGAAAATGACTGTGGAGT\n+>25063\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25064\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25065\n+AATTGTGGAACTTTCTTGCTAAA\n+>25066\n+TGTAAGCTCTGAAATAACATATT\n+>25067\n+TTGAAAAGCTTGCGCAGTTTCATGGGA\n+>25068\n+TGGTTTCCTTATTAAGTTTACGGTA\n+>25069\n+TCTTTGGTATTCTAGCTGTAGA\n+>25070\n+TACATACTGGCCAGCAGACCCAAGAA\n+>25071\n+TGATGCACAAACCTGGCAAGCCGGA\n+>25072\n+TGTGATGTGACGTAGTGGAACA\n+>25073\n+TATATTTCGGGTGTTTTGTGATTGTG\n+>25074\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25075\n+TATCACAGCCATTTTGACGAGTT\n+>25076\n+TGCCGACCTGCCTGGGATTTGGGGTT\n+>25077\n+TCCTCGGATTTCGTTCTCTCAATT\n+>25078\n+TGTGATGTGACGTAGTGGAAC\n+>25079\n+GCATCGGTGGTTCAGTGG\n+>25080\n+TATTGCACATTCACCGGCCTGAAA\n+>25081\n+TGTGGCAATACGTTTGTTTAGGCGGC\n+>25082\n+TCTTTGGTGATTTTAGCTGTAT\n+>25083\n+TATCCACCCGAAATCAGTTTTTTGA\n+>25084\n+TCCGCAAATCCGTGAATCTGAATGACT\n+>25085\n+TAACGTACTCAGAACTGAACTAAT\n+>25086\n+TAACGGAGAGGCCTTACATAACGGGG\n+>25087\n+TTGGGGGTTGGACATAGTTAATGTT\n+>25088\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25089\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>25090\n+GGATCTAGTATACAAAATCGGAGGAGC\n+>25091\n+TTTTGTCTGAGGTTATTAAATATA\n+>25092\n+TCAAATGCAAATTGGATTGAGGAGA\n+>25093\n+TTTTTTTTGTTTTGAGGTTGATCAGA\n+>25094\n+TAAGTTGAGACCAAAAGTTAGATT\n+>25095\n+GTTCAATTCCCCGTCGGGGA\n+>25096\n+TTTTTCTGTTTATTTTCAATGAAACG\n+>25097\n+TTGGCGCTTCTCCTGGCGTGCCCGGTA\n+>25098\n+TAGTAGAAACAAACAGGCGACTCCATC\n+>25099\n+TGGACGGAGAACTGATAAGGGC\n+>25100\n+TGTGATGTGACGTAGTGGAACA\n+>25101\n+TCACTGGGCTTTGTTTATCTCA\n+>25102\n+TGAGTTTGTAGAGGAGTCCAGGAT\n+>25103\n+GATGGTATTTGATGATGAACTTGAA\n+>25104\n+TTGACTCCAACAAGTTCGCTC\n+>25105\n+GAGCGAATTCTGAGTGGT\n+>25106\n+CCTGCGTGATCTTGGCAACTCTGTTG\n+>25107\n+GGGAAAAACAATATCGTTTAGTGATT\n+>25108\n+TCTATTCCACTGGAACTTTGCACGTT\n+>25109\n+TAGATGTAATAGATTTGGTTTCCGA\n+>25110\n+TCTGATCTAAGAAATTGAGACGTGGC\n+>25111\n+GCAGTCGTGGCCGAGCGGTTAAGGC\n+>25112\n+GCATCGGTGGTTCAGTGGTAGAATG\n+>25113\n+TCGATGCGGAGATTTTTGGACGGG\n+>25114\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25115\n+ATCTCGGTAGAACCTCCA\n+>25116\n+AAATATCAGCTGGTAATTCTGGG\n+>25117\n+TAA'..b'CCACATGATTCGGCTT\n+>49886\n+ATACATTGTAGACGGTCTTACGGGA\n+>49887\n+TAAACTAATAGAGACAGGTAGAATC\n+>49888\n+TTGTTGCAATGTCTGACTGGGGTTCGT\n+>49889\n+TGCTTGGACTACACATGGTTGAGGGTTGTA\n+>49890\n+GGGTCAGGCGATGATGAATT\n+>49891\n+TGTGATGTGACGTAGTGGAACA\n+>49892\n+TTGGGATATTGTTGGAAATGATTTTT\n+>49893\n+TATTCGAGAATTTTGTGATTAGTGA\n+>49894\n+TTGTGGAATGTTCGTGTCGAA\n+>49895\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49896\n+TGGAAGACTAGTGATTTTGTTG\n+>49897\n+TGCGACGGCGGCGAACGTAGTA\n+>49898\n+TGTGATGTGACGTAGTGGAA\n+>49899\n+TGATGACGAGCGTTCTTTTAGA\n+>49900\n+TCAGGGTGACCACACATTTCAAGGA\n+>49901\n+TGGAATTTCGTTGTGTCGTCAGTGTGA\n+>49902\n+AAGGACCCGAGGGCTGCAACCTTTTC\n+>49903\n+TTAGATAACTGAAAGCAAGTACTGG\n+>49904\n+AAGAAGCCGTCGAGAGATATCGGA\n+>49905\n+TGTTATCGATCATTTTAGTTCGCTGA\n+>49906\n+TCCCATATTGTCTAGTGG\n+>49907\n+TACATGGTTGTCTTGTAGAGTTGACGC\n+>49908\n+TGAGCGGAGAACCAGAGTTGATGTG\n+>49909\n+TATAGGTCTGATTCTAAAATGGGTGA\n+>49910\n+GGACTGACTCGTGTAGTGTGCACT\n+>49911\n+TGTTTTTCGGCCTTATAAACGGGG\n+>49912\n+CCTAATAAGAATTGAGGGATCAGGA\n+>49913\n+TATTTGTGCTGCCTCCTCTGAAATCA\n+>49914\n+TGTTATGTTGCCAGTCTGAGTCGTCAG\n+>49915\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49916\n+CAATATCGTCAACATCCTCGAACG\n+>49917\n+TGTGATGTGACGTAGTGGAAC\n+>49918\n+TTTAAGCTGGTAGGTGTAGAAGCCT\n+>49919\n+TTCTTTCGGATTCTGAAGTAATTAAT\n+>49920\n+GAAGATGAAACTGTTCTGGACGGA\n+>49921\n+TCACTGGGCTTTGTTTATCTCA\n+>49922\n+TCAGGTACTTAGTGACTCTCAA\n+>49923\n+ATCGAAAAGATTCGCTGAAGTTGGGC\n+>49924\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49925\n+TAGATGTAAGAATAACTGTTTTGA\n+>49926\n+TAGCAACCAGGTCATCTTCAAACT\n+>49927\n+TTTATTGGAGAGGTTGATCCT\n+>49928\n+GAAGGGTTCGGGCTCAATTAGAGGGT\n+>49929\n+CATGGGTTCTGATGTGTTTTCACGA\n+>49930\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49931\n+TCCATTTTTGAACACAGTTTGATGGGA\n+>49932\n+TAACTGAAGTATCTGAGGATTGGATT\n+>49933\n+TTTGAACTGGTCTGTGTTGAATTCAACC\n+>49934\n+GACGTTAGAAATCCGTTGGTGGA\n+>49935\n+TGCTTGGACTACATATGGTTGAGGG\n+>49936\n+TGGACTCGTTAGGTATGGATGTTGC\n+>49937\n+TGAGTCCCACAATACTGTATATA\n+>49938\n+TAAGACTATAATTGATGGAATGAACT\n+>49939\n+TGTTATACTCAGATACAGACGGTTCGA\n+>49940\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49941\n+TCTGAGTTTATTTTTAAAAGGTGTT\n+>49942\n+TCAGGTACTTAGTGACTCTCA\n+>49943\n+TCAGGATTGCTGAGCTGTGCGGTA\n+>49944\n+GGGGATGTAGCTCAGTGGA\n+>49945\n+AATATCATCTATTCTCGGTAGTGGA\n+>49946\n+GGTTCCATGGTGTAATGG\n+>49947\n+TGCGAGGTCAATGGTTGTAAAGTA\n+>49948\n+TCTTTGTAGTCGGTTGGCTGTACAGGT\n+>49949\n+GGCACTTGAACTTGGCACTGGACGC\n+>49950\n+GGGGACGTAGCTCAGTGGGG\n+>49951\n+TGCATAAGATATTCCGCCTCGAGAAGA\n+>49952\n+TGGACGGAGAACTGATAAGGG\n+>49953\n+TATTAACCAGACTGCAAATATACT\n+>49954\n+AACAATTAGGACCACGGCGATGGT\n+>49955\n+TTGGTGTAATCTTGGATCGGAGAC\n+>49956\n+CTAACTGTGAAAGGGGAATTGACCGGC\n+>49957\n+TGTAAAAGGGTGTCTCACTGCGGC\n+>49958\n+TTCTTATGGATTCTGAGTGATGTG\n+>49959\n+TTACAAAGTTGTAGATTGGTCGGGG\n+>49960\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49961\n+TAAATTGATTTAGTTTGAATTTAGA\n+>49962\n+TCTTTGGTATTCTAGCTGTAG\n+>49963\n+TAATACTGTCAGGTAAAGATGTCA\n+>49964\n+TATGCCGTAAGCTTGTTGCGCGCGGA\n+>49965\n+ACTTTCGGACTGATTGTTATGGTGG\n+>49966\n+TAATTGAGTACAGTTGGACATAGC\n+>49967\n+GCGGAGGTTGCGGTGCTGTACACTG\n+>49968\n+TTTCAGTTCGGGTAGGGTGGG\n+>49969\n+TGAGTACAAGCCCTTCTGGCGTGAAGA\n+>49970\n+TTTTGGAGCCACTTTCGCCTCGTAGGA\n+>49971\n+TCTATCCAAAGAGCTGATTGTCATACT\n+>49972\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49973\n+TTTGTGCAAATCAATATAAATTGA\n+>49974\n+TCGAATTGCTGAATGCCGAAGTAAAATA\n+>49975\n+AGATATGTTTGATATTCTTGGTTG\n+>49976\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49977\n+TCCCCCCCTAAGATTATGACCGTC\n+>49978\n+TTGCAGACGGACTTTTGAATTCACAGG\n+>49979\n+TGCTTGGACTACATATGGTTGAGGG\n+>49980\n+TGAGGTAGTAGGTTGTATAG\n+>49981\n+TGACGATGCCGCTGTAGAGCTTG\n+>49982\n+TGTTGTGTTGCGACAGTAGAGCGC\n+>49983\n+TACATGGCGACTTTCTTGCAACTGAACT\n+>49984\n+TGCAGGAAGCTTTCAAGGCAACAGA\n+>49985\n+TTAGGACGAGATTCGCTAATGCAAT\n+>49986\n+TAACATATGTGCAAGTTATTGGGA\n+>49987\n+TGTGATGTGACGTAGTGGAA\n+>49988\n+AACTTCTTCTATCTTTGTGCGGGA\n+>49989\n+TATTTGGGTCACCGGGTTAAGTAGCGC\n+>49990\n+ATATTGTCTAGTGGTTAGG\n+>49991\n+TGAGGTAGTAGGTTGTATAGT\n+>49992\n+TCTTGGACTGAGCAGCTACTGTTTG\n+>49993\n+GCTCTCTTGAGTGGATTGCGCATGGA\n+>49994\n+TGATCTGGGGTGCATGGTAATCGG\n+>49995\n+AATGGCACTGGAAGAATTCAC\n+>49996\n+CGGGAAACTATGGATCAAATG\n+>49997\n+ATCTGCCTGAGTCGACTGTTCCGTAA\n+>49998\n+TTTGAGCAGCGAATCTGGAACGGT\n+>49999\n+TCGAAGACTAGACGGATTTTTCCCGGCT\n+>50000\n+TATTTAGAAAAACAGGTGAGTGA\n'
b
diff -r 000000000000 -r 0528fced93a9 test-data/sample2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample2.fa Wed May 27 17:31:35 2015 -0400
b
b'@@ -0,0 +1,50000 @@\n+>25001\n+ATGTGATGTGACGTAGTGGAA\n+>25002\n+CAATTTGTGGAGCCTCTGTGTGC\n+>25003\n+TGAGTATTACATCAGGTACTGG\n+>25004\n+TGTTAGTAAGCTTATCGGTCTATA\n+>25005\n+GGGGATGTAGCTCAGATGGTAGAGCCT\n+>25006\n+TCAGAAAATGTAGATTCAATTCGATGT\n+>25007\n+TACTATGGTTGGAATTTCTATCTGG\n+>25008\n+GATTGAGATAGTCGGGCTTGAT\n+>25009\n+TTGAGCAAAATTTCAGGTGTGTGA\n+>25010\n+CAATCTGTTTATGAGGAATCAAAA\n+>25011\n+GAAACAGTTCTGACGTGCA\n+>25012\n+TAAGCGTGATTGTCCGGGCAAAAGGAA\n+>25013\n+TTCTATTGTTGCCTGAAGAATAAGGT\n+>25014\n+TGATTGGGAAACAATAACATGGTT\n+>25015\n+CATTAACTCGCATTGATTACGTCC\n+>25016\n+TGTGATGTGACGTAGTGGAAC\n+>25017\n+TAATGGAACACTGATTATACACTTC\n+>25018\n+ACTACTGTGGAGACATTTTTT\n+>25019\n+CGTGATGTGACGTAGTGGAAC\n+>25020\n+TGAAATCAGGGCAACTCAAAATTGGG\n+>25021\n+TGGACGGAGAACTGATAAGG\n+>25022\n+ATCGATAGTATCGTTTTTTGTACGTA\n+>25023\n+TCTAGAGCTGTTAAATTTGAAACAGTG\n+>25024\n+TGATTGTTTGATGACCGAAACT\n+>25025\n+CTGTAGCATCTCGTGGGTCGGAC\n+>25026\n+TGACTAAAAGTCGCTTGTTTTGGAA\n+>25027\n+TGTGATGTGACGTAGTGGAACA\n+>25028\n+TGTGATGTGACGTAGTGGAACA\n+>25029\n+CGGATAGAGCGAATTCTGAGTGGT\n+>25030\n+TTAAGTTCTGCATCCGTATGAGTGG\n+>25031\n+TCAGAAAATGTTGGTAACTCTTACACT\n+>25032\n+TGTGATGTGACGTAGTGGAA\n+>25033\n+TCAGGTACTTAGTGACTCTCAA\n+>25034\n+TTTCTAATTTGCCTTTCTGTAGTGG\n+>25035\n+TCAAGTTTGACTGGTCGTGTTGA\n+>25036\n+TCTGTCCTTGATTGTCAAC\n+>25037\n+ATAATACGTTTCGAACTAATGAA\n+>25038\n+TGTGATGTGACGTAGTGGAAC\n+>25039\n+ATCCAGTTCAATGTACTAATTGCCAAGA\n+>25040\n+TGCAAAACAAGAATTTCTCGCATGGTG\n+>25041\n+TGGACGGAGAACTGATAAGGGC\n+>25042\n+AGGCCCTGTAATTGGAATGAGTAC\n+>25043\n+TGAACACAGCTGGTGGTATCC\n+>25044\n+TTCAAGTTTTGGCTAGAACCCTGGTGA\n+>25045\n+TTTTTGTTTCCGACCGACCGCACGGGA\n+>25046\n+TGTTGAAAGCGTTCCTTACGTCTAGA\n+>25047\n+TGTGATGTGACGTAGTGGAAC\n+>25048\n+ATACTTGGACTGGATCCGCGACA\n+>25049\n+TAGATGCAATTCAAGCACTTCAAGGGA\n+>25050\n+TAACATCTTGGATCGATATGAATTG\n+>25051\n+TCTTTGGTATTCTAGCTGTAGA\n+>25052\n+TGCATCGGTTCCGTCTGTCCG\n+>25053\n+TGTGATGTGACGTAGTGGAACAT\n+>25054\n+TCTGTGCAACTGTTTACAAATTTGGA\n+>25055\n+TAGCGTAATGAGTTCGGCTCATGATC\n+>25056\n+TAGATGTAAGAATAACTGTTTTGAGC\n+>25057\n+TCATTGGATGGTCAAGATGTCGTAA\n+>25058\n+ATCGGTGGTTCAGTGGTAGAATGCT\n+>25059\n+TATTATCAAAAGCTTCTTGAGCAAGA\n+>25060\n+GGGGATGTAGCTCAGATGG\n+>25061\n+TGGAATGTAAAGAAGTATGGAG\n+>25062\n+TTATTATTATTAAGATGGGCTTGTAG\n+>25063\n+TATCACGTCTGGGGAAATCAAACTGC\n+>25064\n+GACGAAATTTGTCGCACATATTCCCT\n+>25065\n+TCAGGTACTTAGTGACTCTCAA\n+>25066\n+TGTGATGTGACGTAGTGGAAC\n+>25067\n+TTGCAGCAAGCTAGAATGAA\n+>25068\n+TCAAAGATTGTCTGGAGGTGCTT\n+>25069\n+TGTGATGTGACGTAGTGGAAC\n+>25070\n+TAAGGAAATAGTAGCCGTGATT\n+>25071\n+TGTGATGTGACGTAGTGGAACA\n+>25072\n+CTAGAGATTCTGTTCAACTGGT\n+>25073\n+GCGCATTCGAGTCGTCGAGATCCGTA\n+>25074\n+AAAAGATGCGGAATGTCATAAAACA\n+>25075\n+ATATGAACAAAGCAAAGACACTAG\n+>25076\n+TGTGATGTGACGTAGTGGAACA\n+>25077\n+CGAGTTACGAAGAGAACAGTTGATGC\n+>25078\n+TAGTATTTCTGGGCTGCCATCAGT\n+>25079\n+TGAGTATTACATCAGGTACTGGT\n+>25080\n+TAAAGTTGTGGCGTAAAAACT\n+>25081\n+TGTGATGTGACGTAGTGGAAC\n+>25082\n+TGCTGATAACGCGAAGGTCGCGGG\n+>25083\n+AGGACCCTAAACGAACATGAAATT\n+>25084\n+TGGACGGAGAACTGATAAGGG\n+>25085\n+ATACGGACGATTTTGAACTCT\n+>25086\n+TCCCGATCTGTGGCAGCATTCCGAAG\n+>25087\n+TGTTCGAGCGAACAGTGGTAT\n+>25088\n+TGTGATGTGACGTAGTGGAACA\n+>25089\n+GATTTTCGTGTGAGGCGAGCT\n+>25090\n+TGTGATGTGACGTAGTGGAACAA\n+>25091\n+AAGAAGAAAATCTGAAGATGGATCC\n+>25092\n+TGACTAGATCCACACTCAT\n+>25093\n+AATGGCACTGGAAGAATTCACGGG\n+>25094\n+TTAATACCTCGAGTACGTCTGCT\n+>25095\n+TTTTATTAAGCTTTCTTCTAATCCATA\n+>25096\n+AGAGTTTGCTAAGCCACATGAGGGA\n+>25097\n+CATGCTTGGGATTGTGAACTGAT\n+>25098\n+TGGACGGAGAACTGATAAGGGC\n+>25099\n+CAGGACGGTGATCATGGAAGT\n+>25100\n+TATTATGATGATTAAGGATA\n+>25101\n+TGTGATGTGACGTAGTGGAACA\n+>25102\n+GATAAACACAAGCAAGAGGAAAGG\n+>25103\n+TAGTATGACATACTCTGATGCAAGAAT\n+>25104\n+ACTGGAGCACTCCAAGGTGAATGGC\n+>25105\n+GTCCTCTAAAGTCTGCCGCCGGA\n+>25106\n+TTTCAAGGTCGAGCATCATGGC\n+>25107\n+TGTATAGATATTAGAGAATATGTT\n+>25108\n+TGAGATCATTTTGAAAGCTGATT\n+>25109\n+TGTGATGTGACGTAGTGGAA\n+>25110\n+AACGGATAGAGCGAATTCTGAGTGG\n+>25111\n+TTTTCTGTGTCGGTTTTTGCACCTCC\n+>25112\n+TGGGGGACCGCTTGGGAACACC\n+>25113\n+TGTGATGTGACGTAGTGGAA\n+>25114\n+TCGCATCCTACCGGCTGCGCCA\n+>25115\n+TTTATCTGGCCGATCAAACAAATCGGA\n+>25116\n+TGGCAAGATGTCGGCATAGCT\n+>25117\n+GAAACTCTAATAAGATACCCAAAAAGG\n+>25118\n+TGTGATGTGACGTAGTGGAA\n+>25119\n+AATGCACTGGAAGAATTCACGGG\n+>2'..b'9880\n+GTATATCAGATGTTGATTAAGTCGGA\n+>49881\n+CCGGCTAGCTCAGTCGGTAGAGCATGA\n+>49882\n+ATGTACTTCATGACAACCGCGTCGAGT\n+>49883\n+TAAACTTTGATAATGGACTTTTATT\n+>49884\n+TAATACTGTCAGGGAAAGATGT\n+>49885\n+TGTGATGTGACGTAGTGGAA\n+>49886\n+TGAGAGTAGACTTATTGGTCTGTAAGA\n+>49887\n+TGTGATGTGACGTAGTGGAAC\n+>49888\n+TGTGATGTGACGTAGTGGAAC\n+>49889\n+TTGATGACTGGAAACGCTTCGTGG\n+>49890\n+AATGGCACTGGAAGAATTCAC\n+>49891\n+TGTGATGTGACGTAGTGGAAC\n+>49892\n+TCTAGATCCGCAAGGACTGATCAGGGC\n+>49893\n+CAGAACTTGAAGTCGTCGAGGCA\n+>49894\n+TCTTTGGTATTCTAGCTGTA\n+>49895\n+TGGAATGTAAAGAAGTATGGAG\n+>49896\n+TGCAGAAGAGTTTGGCACGACC\n+>49897\n+ATGTGATGTGACGTAGTGGAA\n+>49898\n+ATAGGGGCGAAAGACCAATCGAA\n+>49899\n+TATTAAGGCAAGGAACTGCAATGC\n+>49900\n+AAATGGTCATATAGATGTAAGA\n+>49901\n+TAATCCTTTATGATCCGATCGTA\n+>49902\n+TGGGATTAGTTTTTTAGCTA\n+>49903\n+TGTGATGTGACGTAGTGGAA\n+>49904\n+TCTCTTTGAGAATAGAGATATCTGCA\n+>49905\n+TTCGTTGTCTCGTAATGTTCGAGA\n+>49906\n+TCCCATATTGTCTAGTGGTTAGGA\n+>49907\n+TGTAAACAGGGTGTAGAGGATGG\n+>49908\n+CAGTCGGTAGAGCATGAG\n+>49909\n+GTCTAATTAAAACAAAGCATTGTGA\n+>49910\n+TAAAATAATCGTAACAATGGAATACA\n+>49911\n+CGTGATGTGACGTAGTGGAA\n+>49912\n+TCTTTGGTATTCTAGCTGTAG\n+>49913\n+GATAGCTCTTTCTCGAAT\n+>49914\n+TAAAGTTAGTGCCAAGATGGGAGA\n+>49915\n+CACAAAGTTATTCTGCTACCGATGGAG\n+>49916\n+TGGACGGAGAACTGATAAGGGC\n+>49917\n+CAGGGGAAACCCTGATGGA\n+>49918\n+TAATCTCAATTTGTAAATGTGA\n+>49919\n+TCACGAATGCCGTCGAACCAATTGTT\n+>49920\n+TGAAGTGGAATACACAATTATGTCA\n+>49921\n+AATGGCACTGGAAGAATTCACGGG\n+>49922\n+TGTAAGGGTACTGTGGAATCTTTC\n+>49923\n+TGTGATGTGACGTAGTGGAA\n+>49924\n+TCCTAAAAATTCTACCTGCGTGT\n+>49925\n+CAATTCGCAGTATTCTGCAAAGTGGGA\n+>49926\n+TTTGTACATTGTACAGGAGCGTTGCGTT\n+>49927\n+CATCACAGTCTGAGTTCTTG\n+>49928\n+TATGCTTCTCATGTAGGAATTGAGC\n+>49929\n+TGTGATGTGACGTAGTGGAA\n+>49930\n+TGTGATGTGACGTAGTGGAAC\n+>49931\n+TGGAATGTAAAGAAGTATGGAG\n+>49932\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49933\n+TTGCCTTCGTTGTTCTGTATATTCCT\n+>49934\n+TGTGATGTGACGTAGTGGAA\n+>49935\n+TCCCATATTGTCTAGTGGTTAGGA\n+>49936\n+GATGGTTGGCAACAGAGTCTGA\n+>49937\n+TGTGCGCTTGAGAATTGTGGCTCT\n+>49938\n+TAGTATCTAGGACTACCATGGT\n+>49939\n+TGTGATGTGACGTAGTGGAA\n+>49940\n+TTCGTTTTCCGATTGTATGTTTCGGA\n+>49941\n+AGCCAGAGCGCCAGACACTGAAAT\n+>49942\n+TGTGATGTGATGTAGTGGAAC\n+>49943\n+TGAATGGTTGAAATCGCTCGGCGA\n+>49944\n+TAGCTTGGTTCAACAGCGACTGACT\n+>49945\n+TCAAGGTTATGTGGAGCATCTGTAAC\n+>49946\n+TACGAACTCTGACGAAGATATTTGGGA\n+>49947\n+TTTCCGCCAACCTGCAATGAAAGGGA\n+>49948\n+TGAGATCATTTTGAAAGCTGAT\n+>49949\n+TGTAAAATTCTCGCCTCCTCTGGAC\n+>49950\n+CGGATAGAGCGAATTCTGAGTGGT\n+>49951\n+TGCCTGGACATGTAGATGAATCGAGT\n+>49952\n+TGAAGAAACAGCAGAACATGTG\n+>49953\n+TGGACGGAGAACTGATAAGGGC\n+>49954\n+TATTGGACAGGAAGATTGATGGT\n+>49955\n+AATGGCACTGGAAGAATTCACGG\n+>49956\n+TGTGATGTGACGTAGTGGAA\n+>49957\n+TGTGATGTGACGTAGTGGAAC\n+>49958\n+TCCCATATTGTCTAGTGGTTAGGA\n+>49959\n+TGTGATGTGACGTAGTGGAA\n+>49960\n+TGTGATGTGACGTAGTGGAA\n+>49961\n+ACCCTTGAATGGCATAATGTCTT\n+>49962\n+TAGTTACCCTACTGAAGAACTCCTC\n+>49963\n+TGTGATGTGACGTAGTGGA\n+>49964\n+CTGCGGATCAGAAGATTCCAGGTA\n+>49965\n+TGTGATGTGACGTAGTGGAACA\n+>49966\n+TGATAGTGAACTACAACTAAGGA\n+>49967\n+AAATATCAGCTGGTAATTCTGGG\n+>49968\n+GTTCTTAGTTCGTGGAGT\n+>49969\n+CATCGTCGTTTGCGTATGTGGCCCGC\n+>49970\n+TGTTGTGGGTCAAGGCATTTGGA\n+>49971\n+TACATGTGCTGCTGAATGATGCGGC\n+>49972\n+TGCATTCGGAACAAGTGCCGTTGG\n+>49973\n+TGAACACAGCTGGTGGTATCC\n+>49974\n+TTGTCGCTGAACGCGGGTGTCGTT\n+>49975\n+GCTTGATGATTCGAAAAAA\n+>49976\n+TGCCTGGTTTTTTCCCTGAACAAA\n+>49977\n+TGTGATGTGACGTAGTGGAACAA\n+>49978\n+TGTGATGTGACGTAGTGGAA\n+>49979\n+AATGGCACTGGAAGAATTCACGGGT\n+>49980\n+TGTGATGTGACGTAGTGGAACA\n+>49981\n+GAAATTTTATTGCATTTGATTTTGG\n+>49982\n+TGCTTGGACTACATATGGTTGAGGG\n+>49983\n+TAAAGCTAGATTACCAAAGCAT\n+>49984\n+TATTAGAGAATATGTTGAAGAAGGGA\n+>49985\n+GCTGAATCAACATAAACATCGGG\n+>49986\n+TTGGCAGAACTTAAAAAAA\n+>49987\n+TGTGATGTGACGTAGTGGAA\n+>49988\n+TTTACCTCTCTGCTGAACTGTTGC\n+>49989\n+TGTGATGTGACGTAGTGGAACT\n+>49990\n+TGAAATCATTTTTGTAGCACTAAGGT\n+>49991\n+TGGAGGTGTTATTAACAGTAAGGA\n+>49992\n+CGCTTCTGAATGGCCGTGTCGTGGGA\n+>49993\n+GAGATGGCCTGTATCAATTTCTGTG\n+>49994\n+TGGACGGAGAACTGATAAGGGC\n+>49995\n+AATGGCACTGGAAGAATTCACGG\n+>49996\n+TGTGATGTGACGTAGTGGAAC\n+>49997\n+TATCACAGCCATTTTGACGAGTT\n+>49998\n+TACTTTTTGAACGATTTTGGGAAAT\n+>49999\n+TGCTTGGACTACATATGGT\n+>50000\n+TTCATTCTAGTTTTCTGATATTAAAAA\n'
b
diff -r 000000000000 -r 0528fced93a9 test-data/sample3.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample3.fa Wed May 27 17:31:35 2015 -0400
b
b'@@ -0,0 +1,50000 @@\n+>25001\n+TGATGACCGAAATTTGGAAAAACGGA\n+>25002\n+CTAGGCAGCGATCATCGGGACCCGAGA\n+>25003\n+TAACATCGTCTGCGTACATAAGTAC\n+>25004\n+TGTGATGTGACGTAGTGGAACC\n+>25005\n+GGCAAATGTGAGATGCAGTGTATGG\n+>25006\n+TTCCAGTGTCGGTATTATGAATGTCA\n+>25007\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>25008\n+TCGGACTGTAAGTCGAAATGGCGC\n+>25009\n+ATGTGATGTGACGTAGTGGAAC\n+>25010\n+TGGAATGTAAAGAAGTATGGAG\n+>25011\n+TTTTGTCACACAAATTCTTTTGCGGAT\n+>25012\n+TTTGTAAACTACTGATCGGCCAAAAGTA\n+>25013\n+TGGACGGAGAACTGATAAGGGC\n+>25014\n+TTAGATCAGATTTGTGGAAAATTT\n+>25015\n+TGTTTTTCGGCCTTATAAGCGGG\n+>25016\n+TGTGATGTGACGTAGTGGAAC\n+>25017\n+TGTGATGTGACGTAGTGGA\n+>25018\n+TTCGAACTGGTCGATGGC\n+>25019\n+TATCTGTTGTCCGTTTTGAAAAGAA\n+>25020\n+TATTGAACGAAATAAATTAATGGCA\n+>25021\n+TGGACGGAGAACTGATAAGGGC\n+>25022\n+TGTGATGTGACGTAGTGGAAC\n+>25023\n+TGTTGTTGTTGCTCGTCGTCATT\n+>25024\n+TAGCGCGAATGTGCTAGTAGCCTGG\n+>25025\n+TGGAATGTAAAGAAGTATGGAG\n+>25026\n+AAATAATCGTCCTCTTCTTCTAGCT\n+>25027\n+TTAGTGACTATGACTATGTGGGCA\n+>25028\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25029\n+CCATTTTTGAACACAGTTTGATGGGA\n+>25030\n+GTGGATAACATGAATTTGAACGT\n+>25031\n+TGGACGGAGAACTGATAAGGGC\n+>25032\n+CCATTTTTGAACACATTTTAATTGTA\n+>25033\n+TGTGATGTGACGTAGTGGAA\n+>25034\n+AATCAATTATTTCTGAAGTGG\n+>25035\n+TGACTAGATCCACACTCAT\n+>25036\n+TGTGATGTGACGTAGTGGAACA\n+>25037\n+TGGAATGTAAAGAAGTATGGA\n+>25038\n+TGGAATGTAAAGAAGTATGGAG\n+>25039\n+TGCTGGTATAGTTAGATCGTGAAGA\n+>25040\n+TTAGTCATGGGCTTCGTTATTCGTGC\n+>25041\n+TATCGCTGTATTAGTCGTCTCTTACGA\n+>25042\n+TGTGATGTGACGTAGTGGAACA\n+>25043\n+TGTGATGTGACGTAGTGGAACA\n+>25044\n+TCTTTGGTATTCTAGCTGTAG\n+>25045\n+TCTGATAGTGGTGGCACAATTGC\n+>25046\n+TGTGATGTGACGTAGTGGAACA\n+>25047\n+TGTTACTACGTCTGCTTGTGGGATA\n+>25048\n+TGTGATGTGACGTAGTGGAAC\n+>25049\n+TGTGATGTGACGTAGTGGAAC\n+>25050\n+TGTGATGTGACGTAGTGGAACA\n+>25051\n+TCTGTCGTACGCGTTATTCGCTGG\n+>25052\n+TGTGATGTGACGTAGTGGAAC\n+>25053\n+TGGAAGACTAGTGATTTTGTTG\n+>25054\n+CAAAATCTGAAAGGCGGTACCCTT\n+>25055\n+TGTGATGTGACGTAGTGGAA\n+>25056\n+TAACAATCACATATAACAAATAGGA\n+>25057\n+TTGGGAGCGACCTGTAACGTGTG\n+>25058\n+TGGATGAAGCTAAACATTAATGTCTA\n+>25059\n+TGAACACAGCTGGTGGTAT\n+>25060\n+TGTATTGGGACTGATATACATATT\n+>25061\n+TGATGACCGAAAATTGGAAAAACA\n+>25062\n+TGTGATGTGACGTAGTGGAACA\n+>25063\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25064\n+TTTCGTGCCTACCATCAAACTCTGG\n+>25065\n+TGAGGTTCATACTCGATAACCAATGGGA\n+>25066\n+GGATCTTGCTGGTAATAACGTTGTGT\n+>25067\n+CGGCGCTTCACAGGCGCTGGA\n+>25068\n+TATGAACAGTAGATACTCTTTTGTGTA\n+>25069\n+TGGACGGAGAACTGATAAGGGC\n+>25070\n+GAGTAGAACAGCCGAACTTCCGGA\n+>25071\n+TCTGAAATAGCGCAATGTCGTCGTT\n+>25072\n+TAATTCAACGTCGGCGTGTTCAGGA\n+>25073\n+TAGGAACTTCATACCGTGCTCT\n+>25074\n+TGTGATGTGACGTAGTGGAACA\n+>25075\n+TGAGGTAGTAGGTTGTATAGT\n+>25076\n+TGGACGGAGAACTGATAAGGGC\n+>25077\n+TAATACTGTCAGGTAAAGATGTC\n+>25078\n+TAGTTTAGTTTTTCTATTAGCGGGA\n+>25079\n+TCTTTGGTATTCTAGCTGTAGA\n+>25080\n+GGAATATGGAATGTTATACCTCGCGGA\n+>25081\n+TAAAATGGTCATATAGATGTAAGAA\n+>25082\n+TGTGATGTGACGTAGTGGAA\n+>25083\n+TAACCAACCTATCACACTAAAGG\n+>25084\n+TCTTAACTGGCTTTCGTCGAATTCTGC\n+>25085\n+TGGAATGTAAAGAAGTATGGA\n+>25086\n+TGAATACAGAAACAATTTAGGGGA\n+>25087\n+TGATAGTTGGCAAAAGGTAGCCCCCGA\n+>25088\n+ACTACTGTGGAGACATTTTTTT\n+>25089\n+ATCTGGTTCGTTAATTTGAAAGCTGC\n+>25090\n+TTTGTGTCCCCAGATTGTTGACTCTT\n+>25091\n+TAAGGGAAGTCGGCAAATTAGATCC\n+>25092\n+CTCGATGTAGACGGGCGGTGG\n+>25093\n+TTATATACTCAGTACCAGAGACGTGGA\n+>25094\n+TGTGATGTGACGTAGTGGAACAA\n+>25095\n+TCACAATGAATTAGGTCACTTAGGA\n+>25096\n+TTCGTAGTTTCTGAAACGCTTGTTGC\n+>25097\n+TGATGACCGAAATTTGGATAAACGGA\n+>25098\n+TGTGATGTGACGTAGTGGAA\n+>25099\n+TCCCATATTGTCTAGTGGTCAGGA\n+>25100\n+CGGCGCTTCACAGGCGCTGGA\n+>25101\n+TCACTGGGCTTTGTTTATCTCA\n+>25102\n+TGAACACAGCTGGTGGTATCCA\n+>25103\n+TTCCGTCTGTGGGTCATCTGGGTGC\n+>25104\n+CGGGAAACTATGGATCAAATGA\n+>25105\n+TGCTTGTCGGAGCAAAAGGGGGAGGC\n+>25106\n+CACTTTAGGATTAGTTACCACACGAA\n+>25107\n+CGATTGTATGTTTCGGACATTGTGGGC\n+>25108\n+TATACTGTAGTTGTTCTTGCTG\n+>25109\n+TGGAATGTAAAGAAGTATGGAG\n+>25110\n+ATGTGATGTGACGTAGTGGAA\n+>25111\n+TGGAATGTAAAGAAGTATGGAG\n+>25112\n+AGAAAAAGATATCACGTG\n+>25113\n+AACCGGATGGACGCCACGCAACCCCT\n+>25114\n+GCAAAACGTCGTCGATTGAGG\n+>25115\n+TGGAATGTAAAGAAGTATGGAG\n+>25116\n+TGAACACAGCTGGTGGTATCC\n+>25117\n+TCTCTACTGATCTCTGGGTCTCAG\n+>25118\n+TTACATAAGATATGAACGGAGCCCA\n+>25119\n+TGGAATGTAAAGAAG'..b'TGAGCTCTCCAAAGAGG\n+>49881\n+TGGGATAAACCTCAGCGAACAAGA\n+>49882\n+TGACTGCTCGTCGAACGGGATCGT\n+>49883\n+TGTGATGTGACGTAGTGGAACA\n+>49884\n+TGGAATGTAAAGAAGTATGGAG\n+>49885\n+TAACATTTTCAAAAAGTGTTGGGA\n+>49886\n+TGTGATGTGACGTAGTGGAAC\n+>49887\n+TGGATAGGGGACCTTGAACGGCT\n+>49888\n+TGGACGGAGAACTGATAAGGGC\n+>49889\n+TGTGATGTGACGTAGTGGAAC\n+>49890\n+TGATCGCTGAATTAGGACC\n+>49891\n+TCTTTGGTATTCTAGCTGTAGA\n+>49892\n+TTTCAAGGATTAATGTAGGGGG\n+>49893\n+TGTGATGTGACGTAGTGGAACA\n+>49894\n+TTATTTTCGGAATTTAATGTTGAGA\n+>49895\n+TTTAGGGCCATGTGTCGGATA\n+>49896\n+CTGCTGTCGAGGCCAAAGATCTTTTGGG\n+>49897\n+TGGACGGAGAACTGATAAGGG\n+>49898\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49899\n+TATCACAGCCATTTTGACGAGTT\n+>49900\n+ACTGGATATAGTAGGAAAACTGT\n+>49901\n+TTCTTGGACCGTCGTAAGACTAAC\n+>49902\n+TCGCAGTCGACGAGCTGGTAAGACT\n+>49903\n+TCACTGGGCTTTGTTTATCTCA\n+>49904\n+TTTTATCGTCGTTCTTCAGCATT\n+>49905\n+AGAAGACTCACATGGCGCAGGCAC\n+>49906\n+TGGAATGTAAAGAAGTATGGAG\n+>49907\n+TTGGAACTTCTTGTATAAGTGCATT\n+>49908\n+TACAGTTTATATCTTGTCTAGGCCA\n+>49909\n+TGTGATGTGACGTAGTGGAACA\n+>49910\n+TGGACGGAGAACTGATAAGGG\n+>49911\n+TGCTTGGACTACATATGGTTGAGGG\n+>49912\n+TACAGTTATTCTTTTTCGTTTTGTAGA\n+>49913\n+TTAGATAGAATGAATGAGAAAACTGT\n+>49914\n+TGTCTGTCGTACGCGTTATTCGCTGG\n+>49915\n+TGAACACAGCTGGTGGTATCC\n+>49916\n+TAATTTAGTCATTCTCGTTACTGA\n+>49917\n+TGGACGGAGAACTGATAAGGGC\n+>49918\n+TGCTTGGACTACATATGGTTGAGGG\n+>49919\n+TGGAATGTAAAGAAGTATGGAG\n+>49920\n+TGTGATGTGACGTAGTGGAAC\n+>49921\n+GAAGTTGGGCTGAATGTGGCACGGA\n+>49922\n+TGTGATGTGACGTAGTGGAAC\n+>49923\n+TGGAATGTAAAGAAGTATGGAG\n+>49924\n+ACTACTGTGGAGACATTTTT\n+>49925\n+TTTGTCGTATGGATCCTGCTTGGCATC\n+>49926\n+TCTATTGATACAGTTTTGGTTTAGGA\n+>49927\n+TGATTTACTAGGACCACGGTGTACA\n+>49928\n+TAGGAACTTCATACCGTGCTCT\n+>49929\n+TGTGATGTGACGTAGTGGAAC\n+>49930\n+CCGTCTAGCAATAAATCTTCTGAG\n+>49931\n+TGGACGGAGAACTGATAAGGGC\n+>49932\n+TAAAATATTATAGACGGTTTTACGGGA\n+>49933\n+TCAAACCGAAAACGAATATGAACCC\n+>49934\n+TCCATGCGTCTGCTGATGTTGTGC\n+>49935\n+CAAGACTTGGGCATTCCTATTGATTG\n+>49936\n+TATAGAATTCATATTTTCCGTTGTT\n+>49937\n+TGTGATGTGACGTAGTGGAAC\n+>49938\n+TCTCGAATAGTGTTGTGACTGA\n+>49939\n+TGTGATGTGACGTAGTGGAA\n+>49940\n+TTTCAAGCCTGTGGAAAAATTGACC\n+>49941\n+TAAGACTAGACTGTATTTCCAAAAGAC\n+>49942\n+TGTGATGTGACGTAGTGGAA\n+>49943\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49944\n+TACGGTTATTATGTAATATTTTAAAA\n+>49945\n+TGCATACTTCCGTTCTCTTTTCGTG\n+>49946\n+TTCGAGTATATTCAAAATGACCATT\n+>49947\n+TGTGATGTGACGTAGTGGAA\n+>49948\n+TGGAATGTAAAGAAGTATGGAG\n+>49949\n+AATGGCACTGGAAGAATTC\n+>49950\n+GTTTTACTAAGGCTTCCA\n+>49951\n+TCTTTGGTATTCTAGCTGTAG\n+>49952\n+TAGGATGTCTGATATAGGCCT\n+>49953\n+CAATTTGTGCTGCACATGACT\n+>49954\n+TGGACGGAGAACTGATAAGGG\n+>49955\n+TCTTTGGTATTCTAGCTGTAGA\n+>49956\n+TACTCAGCTATTGTGAGTGGGCGCGG\n+>49957\n+ACTTAAGCGCGTTGGGCGCCAATTA\n+>49958\n+TGTGATGTGACGTAGTGGAAC\n+>49959\n+TGGACGGAGAACTGATAAGGGC\n+>49960\n+TGAGGTAATCATAGAGCACCACGGT\n+>49961\n+AATGGCACTGGAAGAATTCACGGG\n+>49962\n+TGTGATGTGACGTAGTGGAACA\n+>49963\n+TTCTAACTCGAAACTGTGGTATT\n+>49964\n+TTTCAGGCAGCAGGACTATTGGGGC\n+>49965\n+TACCTTAGCATCACGTCGTCGGA\n+>49966\n+TTACGTAGCCAATTGTGGA\n+>49967\n+TGTGATGTGACGTAGTGGAAC\n+>49968\n+TGTGATGTGACGTAGTGGAACAA\n+>49969\n+TGACTAAGGTCGCAAAATTATAGG\n+>49970\n+TGGAGTGTGTTTCGGATCTACCCAGA\n+>49971\n+TGTGATGTGACGTAGTGGAACA\n+>49972\n+TGTCAATGTGGTAGCTGTTCTTAAG\n+>49973\n+TGTGATGTGACGTAGTGGA\n+>49974\n+TCACAATGAATTAATTCACTTAGGGA\n+>49975\n+TAGTACAATAGATCCGACTGGGT\n+>49976\n+TTTTAAGGGATGTGGCATCGATGCGA\n+>49977\n+TGGACGGAGAACTGATAAGGGC\n+>49978\n+AATGGCACTGGAAGAATTCACGGG\n+>49979\n+TGTGATGTGACGTAGTGGAA\n+>49980\n+TGGACGGAGAACTGATAAGGG\n+>49981\n+TGGACGGAGAACTGATAAGGGC\n+>49982\n+TGTTGCATGTGTGGTAGGACTTATG\n+>49983\n+GTGTGATTTGTAGCAAAGTGATA\n+>49984\n+TAGGTTTCTGTGAGCTTTCCACAAAG\n+>49985\n+TCTTTGGTATTCTAGCTGTAGA\n+>49986\n+TTTCTCTGTGAATTAATTGGCGTGC\n+>49987\n+TATTGTACTGTGAAACTGATGGTT\n+>49988\n+TCCAATCTTATACACCCTGTATACGG\n+>49989\n+TGTGATGTGACGTAGTGGAAC\n+>49990\n+TCATTTTTGAACACAGTTTGATTGGA\n+>49991\n+TGTGATGTGACGTAGTGGAA\n+>49992\n+TCAAAATAGACGATTGGTCATAGACT\n+>49993\n+GGGAGCGAGACGGGGACTCACT\n+>49994\n+TGTCTTCTAATTGTCTGATGAGA\n+>49995\n+TGGAATGTAAAGAAGTATGGAG\n+>49996\n+TGACTAGATCCACACTCATTA\n+>49997\n+TTAGTGCCCTGTTACTGGACC\n+>49998\n+AACTCCGAGGGAACCGCAACTGGGC\n+>49999\n+TGTGATGTGACGTAGTGGA\n+>50000\n+CCTGTCTCATCGAGTAGCACTCCTGA\n'
b
diff -r 000000000000 -r 0528fced93a9 tool-data/bowtie_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample Wed May 27 17:31:35 2015 -0400
b
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon  hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full  hg18 hg18 Full  /depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19 hg19 hg19  /depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
b
diff -r 000000000000 -r 0528fced93a9 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed May 27 17:31:35 2015 -0400
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="bowtie_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 0528fced93a9 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed May 27 17:31:35 2015 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="bowtie" version="0.12.7">
+      <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>