Repository 'sr_bowtie_dataset_annotation'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/sr_bowtie_dataset_annotation

Changeset 0:e7e7785e41d0 (2017-09-11)
Next changeset 1:faf1b3b933f5 (2019-02-10)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sr_bowtie_dataset_annotation commit 80b49bd722e8ea8d7dba6dcfe538537cd710d2a2
added:
sr_bowtie_dataset_annotation.xml
test-data/2L-tail.fa
test-data/Ensembl_transposon_set.fa
test-data/dme_miR21_hairpin.fa
test-data/sample.fastq
test-data/sample1.fa
test-data/sample1_output.tab
test-data/sample_output.tab
tool-data/bowtie_indices.loc.sample
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r e7e7785e41d0 sr_bowtie_dataset_annotation.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sr_bowtie_dataset_annotation.xml Mon Sep 11 18:27:40 2017 -0400
[
@@ -0,0 +1,161 @@
+<tool id="sr_bowtie_dataset_annotation" name="Annotate smRNA dataset" version="2.0.0">
+  <description>by iterative alignments with sRbowtie</description>
+  <requirements>
+        <requirement type="package" version="1.1.2">bowtie</requirement>
+  </requirements>
+  <command  detect_errors="exit_code"><![CDATA[
+        #if $refGenomeSource1.genomeSource == "history":
+            bowtie-build -f $refGenomeSource1.ownFile genome  1>/dev/null &&
+            ln -s -f '$refGenomeSource1.ownFile' genome.fa &&
+            #set index_path = 'genome'
+        #else:
+            #set index_path = $refGenomeSource1.index.fields.path
+        #end if
+        #if $input.extension == "fasta":
+            #set format = "-f"
+        #elif $input.extension == "fastq":
+            #set format = "-q"
+        #end if
+        #if $format == '-f':
+            input_nbr_read=\$(( \$(wc -l < $input)/2)) &&
+        #elif $format == '-q':
+            input_nbr_read=\$(( \$(wc -l < $input)/4)) &&
+        #end if
+        #set method_prefix = "-v %s -k 1 --best" % str($mismatches)
+        bowtie -p \${GALAXY_SLOTS:-4}
+               $method_prefix
+               --al matched.fa
+               --un unmatched.fa
+               --suppress 6,7,8
+               $index_path $format '$input' > tabular_bowtie_output.tab &&
+        genome_aligned=\$(wc -l < matched.fa) &&
+        genome_aligned=\$(( \$genome_aligned/2)) &&
+        echo -e "$refGenomeSource1.ownFile.name Matched\t\${genome_aligned}\n" > $output &&
+        #set counter = 0
+        #for $i in $AdditionalQueries:
+            rm genome.fa &&
+            #set $counter += 1
+            #if $counter != 1:
+                #set input = "class_unmatched.fa"
+            #else:
+                #set input = "matched.fa"
+            #end if
+            touch temp_class_matched.fa temp_class_unmatched.fa &&
+            bowtie-build -f $i.ownFile genome  1>/dev/null &&
+            ln -s -f '$i.ownFile' genome.fa &&
+            #set index_path = 'genome'
+            bowtie -p \${GALAXY_SLOTS:-4}
+                $method_prefix
+                --al temp_class_matched.fa
+                --un temp_class_unmatched.fa
+                --suppress 6,7,8
+                $index_path $format '$input' > tabular_bowtie_output.tab &&
+            class_aligned=\$(( \$(wc -l < temp_class_matched.fa)/2)) &&
+            class_unaligned=\$(( \$(wc -l < temp_class_unmatched.fa)/2)) &&
+            mv temp_class_unmatched.fa class_unmatched.fa &&
+            echo -e "$i.ownFile.name Matched\t\${class_aligned}\n" >> $output &&
+        #end for
+        remaining=\$(( \$(wc -l < class_unmatched.fa)/2)) &&
+        echo -e "Unmatched to previous indexes\t\${remaining}\n" >> $output
+        ]]></command>
+  <inputs>
+      <param name="input" type="data" format="fasta,fastq" label="Input file: reads clipped from their adapter" help="Only with clipped, raw fasta or fastq files"/>
+    <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments">
+        <option value="0">0</option>
+        <option value="1" selected="true">1</option>
+        <option value="2">2</option>
+        <option value="3">3</option>
+    </param>
+<!-- First bowtie index selection -->
+    <conditional name="refGenomeSource1">
+      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Bowtie Built-ins were indexed using default options">
+        <option value="indexed">Use a built-in index</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="indexed">
+        <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact instance administrator">
+          <options from_data_table="bowtie_indexes"/>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
+      </when>
+    </conditional>
+<!-- End of first bowtie index selection -->
+<!-- other  bowtie index selections from fasta in history (mandatory) -->
+    <repeat name="AdditionalQueries" title="Additional Alignment Step">
+        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
+    </repeat>
+<!-- End of other bowtie index selections -->
+   </inputs>
+   <outputs>
+   <data format="tabular" name="output" label="Cascade Annotation Analysis">
+       <actions>
+           <action name="column_names" type="metadata" default="Reference Index,Number of reads" />
+       </actions>
+    </data>
+   </outputs>
+    <tests>
+        <test>
+            <param name="input" value ="sample1.fa" ftype="fasta" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="sample1_output.tab" />
+        </test>
+        <test>
+            <param name="input" value ="sample.fastq" ftype="fastq" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value ="2L-tail.fa" ftype="fasta" />
+            <param name="AdditionalQueries_0|ownFile" value="dme_miR21_hairpin.fa" ftype="fasta" />
+            <param name="AdditionalQueries_1|ownFile" value="Ensembl_transposon_set.fa" ftype="fasta" />
+            <output name="output" ftype="tabular" file="sample_output.tab" />
+        </test>
+    </tests>
+  <help>
+
+**Introduction**
+
+Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient.
+A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution.
+
+Here The sRbowtie wrapper specifically works with short reads FASTA or FASTQ inputs
+(-v bowtie mode, with -k 1) which has to be clipped from adapter before alignment.
+
+.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
+
+
+------
+
+**What it does**
+
+.. class:: infomark
+
+This script uses the sRbowtie wrapper to iteratively match reads on a reference indexes.
+Read that aligned to the first reference are realigned to the second reference.
+From this point, unaligned reads are taken as input for alignment to the third reference, etc.
+
+
+Reads are Matched on DNA references (both strands) as fast as possible, without taking care of mapping issues
+
+*-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8*
+
+unaligned reads at step N are used as input for sRbowtie at step N+1
+
+-----
+
+**Input formats**
+
+.. class:: warningmark
+
+*Reads must be clipped from their adapter and provided in a FASTA or FASTQ format*
+
+-----
+
+**OUTPUTS**
+
+**Annotation table in a tabular format**
+
+  </help>
+</tool>
b
diff -r 000000000000 -r e7e7785e41d0 test-data/2L-tail.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2L-tail.fa Mon Sep 11 18:27:40 2017 -0400
b
b'@@ -0,0 +1,3001 @@\n+>2L-tail\n+CCCATTCATACAAAATCCTATAGGTACCCCTACATCCACAAAGAGGAAGTTAAAAAGCAAATAGAATCAATGCTAAACCA\n+AGATATCATCAAATCTAGCTATTCTCCATGGAGCGCCCCTGTTTGGGTAGTCCCTGAAAAATTCACTCCTACAGGAGAAC\n+AAAAATGGCGTCTTGTTATCGATTATAGAAAACTCAATGGAAAAACTATATCTGACAAGTATCACATCCTAATATTGCAG\n+ACATTTTAGACAGATTGGGCAAAGCTAGATTGGACTTGATTGGTGGACTAGACACACGAGGACGTGTGAAAGGTCAGGAA\n+GGCCGTGTCGGAGCGCAACTCTTCGCATTCTAGGATTCTTTCTGTGAATTCACATGCTGATGAAATTGTAATATATGCGC\n+CATACGATTTTTTTGCATACGCTTTTTTCGCCGTGGCTTTAGAGGTGGCTCCAGGATCTCTCGAATTTTTGTTCGAGAGC\n+GAGAGAGCGGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATACAATGATAGCAGACAACTGTATGTGTGCACACGTA\n+TGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCAATTTAGAAGTTCTTGGACTTTAAACCTATATTATTTTTGATC\n+AATTGCCACCATGCAAAAATTCTTCTTTTGCATTGCCTTAAAGTTATTATTATTTAAATAAAGCTTAGAAATAGTAATAG\n+CCAAATCATAATATTACAAAATAAATTTCAAAAATGACTATATTAGAATAATTCTCATTAGAGTATTTAGCTTGCGACGT\n+ATGAAAAATTAATAAGGCAATGATTGTTGAGTGCTTGTGTCCGCACATCTCGCCTCAAGATATGACTTTTGCTATTTTCA\n+TTTATTTATTTATTTATTTGCTACTACGTATAATTGTTATGAAATATTTATATAGTTTTTTAATTGAGCCGAACAAATGA\n+TCCGTATTTAAACTAAGATCAACGCTGAAAGTTAAATAGTTTTTATTTTTCAAGTCGAACAAATGATCCGTATTTAAACT\n+AGGATCAACTCCGAAATAAATTAAACACTGAATTCGGTTTCGAAATTCGGTTTTGGTTTTTATTAAATATCGGTTTAAGA\n+CTTATTGAACTTTACGGTATAACTTTCAACTTAAGACTGATTTCAAATGTGAGTGAGATTGAGAAGGGGCCTCTATGGGT\n+TACAGAAATAAAGAATAGGAAAAAAACGGAATATCTCCCAAGGTTGTGAAGTGAAGTAATAACGATCGACACGCAGATAA\n+CGCAGAAAATAGCTTTAAACGGTAAAATCATGTGATGTTCTTGGATGTTTACGTTGAGGAGAGCCTCAGAGAGAGCAGAC\n+GAAACTGCTAGGTCAGGCTTGGGAATCCGTTTACAAGAGCGGCCCAGTTTGGGTGGCGCGAATATTCAATTAACTGAGCG\n+TGATGAGTTGGCGTGAATACGTCACTATATATATATATTTTTTTTTCGCACTGGTCCCATAAAACAGAGAACAGAAACGA\n+GGAAATGAATATAATGTAAAATAAATTTTATTAAAAAGTATTAAATATTAGTGTTAATAGGATCTAATTATGTTATCAAA\n+AAGTTAATATTGATTGCTATTGAAAATACAAAAGTTACACTTATGCAGAGATGCGTGTATGTGGAGAGTGATGAGATTGG\n+GAGGATCATGTAAGAGAGAACGCGCTCTATTATTGTTATCTTGGTACTCTGTCATCGTTGCCTGTTACATGCATGTTAGA\n+GAGTAATCGTCCCATTCGCTTGCAGACAACGAGAGAGTGTAGTTCTATCGGTTGCTTAAATAAAATTATTAGTGAATAGT\n+TAAAACAATGAAAGTGTTTCACTATTTGGTTGATGGGCTGGCTTATAATATACATACTTAAACGTATTATATTATATTAA\n+ATTCAATAATTGCTTTAAGAGCGTCCGAGAGTCAAGACTACAGATAATAGGATAAAGTCTATTATAGTCAGAACATAATA\n+CTCTGTAAGGGTCATGCAACTCATAGTTAGATCTACAATGATTTAAGATAAGGGGTATATAGTTTCCAGTAAATGTACTT\n+GGAACCGAGAAGTTAAGCCGACTAATCAAGTCGGAAACCGAGGAGTTAAGCCGACTAAACAAGTAGGGACTCTCAACATC\n+CCCACGAATAAGCTTACAAATAAAGACTGTTCCAAGCATAGTTCTACGGTTAGCTAGGGACGGTAAATTAATTAAAAGTA\n+ATCTACTGGAATAAGGAGGTAATATATGGTTTGCATCCCAATTTAGGCGCCGCAAGTCAAAAAGTAAGAAGTTTTTTTGG\n+ACCGATTCAATGCGGTCCGAGTTGACTACGTATTGTGGACTCCAAACAGGTGATCCGTACTCGAGAATTGGACGGACAAA\n+CGAAATAAATAAGGTTTTAGTCAAGTAAGGATCATTAAATTCCTTAGACCACCTTTTTATAAAACCAAGCACACCCCTGG\n+CCTTATTGACAATAGTCGAAATATGGTCAGAAAAGTTTAGTTTAGGGTCCAGAAGAACACCAAGATTATCAACCCGTGTT\n+ATTCTGTCCAAAGGGCACCCACTTAGAGTGTAAGTCGTGCGTATTGGGTTGACACGACAAAAGGTCATAACTTTATACTT\n+GGAGCCTTTTAAGTCTAATACGTTATCACGGCACCATATTTGAAATCTGTTTAGATCGGATTGTAAGTCCAAATGGCACG\n+AAATGTCCTTGTGCTGGAGGAATAATTTAACATCGTCTGCGTACATAAGTACACGCGAATGTTTTATTACTAAGGGGAGG\n+TAGTTAATGAATAAGGTAAAAAGAAGCGGGCCAAGATGGCTCCCTTGTGGGACACCGGTTGTGACTCGGAGAATACAAGA\n+TAAAGAATTTATAAAGAGGACTTGTTGCGTCCTGGCATTCAGATAGCTTGAAAATTGGATTTCAGAAGATCAACAATCTA\n+AAAGATCAAGTTTTCTTACTAGAAGGTAATGATTAACAGAGTCAAATGCTTTACTAAAGTCAGTGTAGATGACATCTGTT\n+TGAAGATTTTTTTTGAAACCCTGTATTACGAATCAAGTTAGCTCTAAGAGGTTAGTGGTTGTTGATCTGCGTTTCATAAA\n+ACCGCGCTGACACGGTAATTTGCCGATATTTTTTTTGATAGATCTAACAAAATTAACTATCAGGGCTTAGTGGATTTCAG\n+GAAACAAGCGCTGGAAAAAGTTAGACGGCTCATCCAACGAAAACAGACCTTGTCGAACTTACATTCAAATAAAACTAGAC\n+ATGATAAGGGGGACAGGATGTATGTGGCCAATAAGCAAATATAAACAAAAGAAAAGTTACGATATAGGGTCAAGGAGGTG\n+GCCAGAAAGGATAAGCAACGTGGAATGTGTTCAATAAAGCTGCCTGGTGGGTTGTGTGACGATCCTTACCTTAGTGGGCG\n+TGTTATAGGGCTGCATTATAGGCTGCGTATATAGGGCTGAAGTTAACGAAGTGAATTCGGCTCGTTTGCAAATGATCGTG\n+CATTTAAACGCGCTCGCTGCCCACTTTAATTCATTCAATTCGAACGGCACATTTTGAAACAAATTACGTGAAGTCCGAGT\n+AAAGTGAATAGGCGATTCAAATGTCTTGTCTGCGTATTAGTTTCAATAAAAGACAGTTATGGTATGCTGAGTTCGTGTTT\n+TTATATTATTATTTATTATTACTTAAATGATTATACATTATATTCTTATTTTATAGATATACTCATATTTAATTTTCAAT\n+ACTATTATGAAGAAACATTATTTAATTAATATTCTCTGAATTTAGAGAGATGGCAGAACGTTTGTCTGTTATTAAATTTC\n+GAGCGTCGGAAAATGCTCTCTCAGAGGCAGCGC'..b'AGGTCCCGGAAAGTGGGCCATCGGAG\n+GTAGTCGCCATCGAAGACTTCCGTGTCGCATGGAGGTAAGCGGCAGCCGGACGAAATTAGCGGCTGAGAGGGTGCTTGCG\n+CGACTTGAGGCGTTGCTCTGTCGATTGTTTCGCCAATTTGTGCTGCACATGACTCGTATACTGAGTAGCAGTAGTCATAT\n+TTGGCCTGGAGAATAGGCACTGTGTCGAGGGATCCTTCTTGGGCCATTAGGTCAGAGCATGTTTCGTACTCTCTTTCCAC\n+TTTGTCCCATAAGGCTCGCACCTGTTGCAGACGGACTTGTAACGTGTGTAGGGACGGAGAGGCTTGATCAGGAGTGTTGA\n+TCTTCGCTTCGAAGAGGCTTACGCGATCGCTGACGGCGATGAATTTATGCAACGCTGCGGTTGCGGGCGTTGGCTGCTCA\n+GAGGATGCCATTTTCTTTGTGGTAGAGCGTGTGACGCGGAGGAAATCAGTCCCGACAGCGGAGGGACTCTCGGATTTTCT\n+CGATAGAGAAGACTCACTAGACGCTCGCGTCACTGGTCGGGAAATGACCAACCTAGGAGTTGTCTTCGAACTGGTCGATG\n+GCAAAAACTTTGCTTTCGGAGTGGGAGTCGCGGTTTTGACCCTGGGACTAACAGACTTGGGTGCTGGCTTACCGCGAGTG\n+GATAGCGGAGATTGCGTGTTGAACTTTTGTTCTTTTCCAGGTGCGGGTGTCTTTTTCTTGTCTCCCTCTAGGGGCATGCT\n+CAGCTATGCCCTAGGAGAAGGAAGTCAAGAAGGCCTGCACGCCGCAAAAAATGTGGAATTGAAAAAGAACCAGACACAGA\n+GGGCACCCAAATCTGGATGGACAAAGGATCCCGTCGGAATTCGGGAGAAAGTTGCAATTGGGATCTGGAGATTGGAGCAC\n+GAAAGGAAAAAAATCCCAATTTTTGTGAAATAAGGCCCCAATAGATCTTCAATCAACTGGAAAAAAGCTTAAATGAAGCA\n+CAACATAGCTGAAATTCAGTGTGAAAAAATAGCTAGGAGGATTCACAAAAAAAGCTAGAGTGAAGTCCACAAGAAGATAT\n+AGCTATATTGGAGTCTAGAAAAACAATAGCTCAAATTTATATAAAAAAAAACTATGAATTGCAATTTCTGGAAAAAATAG\n+CTAAAAAAAAAATAGCTACCAGCTGGAGTATATATTTGAATTTATGGAAAAATAGCTAAATAATAGCTACCAGATGGTGT\n+CTGGGTAATAGCCAATAATTTATAAAAATGGAGAAACATATATATGAAATGTAATCTCGGAAGAAAAAGTCAGGGTTTAT\n+TTTATTCTGTATGTCGAGATTTGTGTTTTATTTTGAGCCAAGAGATAAAATTTGTTCTCAGTTCGGCTTCGATATTATTT\n+ATGATTTTTGGTAATATATTTCATTTGTGTGTTAAATTGGTTAAATCTATGGAGTGATTCTTATATATGTATGTGGATGT\n+AAATTTTATTGGAATATATGTGGATATGTATGGATATGTATATTTATGGGTATGTATAATTTTATGTGTATGTAAAATTT\n+TGCTGAATATATGTATATGTATATTTGATTTAATTTAATTGAATTTGTGGTTTGTATATTTTATTGCGGCAAAAAAACAG\n+CAGTTTAGCAGCGGACGATAATAGTGAATTTTGGACAGTGTTTAAAAAAATGTAAGTATATATATGTATATGCGCTTGGA\n+TACCAGCGGATCACCGTGAGACGAATTAGGGGGCCGGTATTGGCAAAGTGCTTGTTTATGCACTTAAAAAAAAGAAGTAA\n+TCACGGTGGCTGGGCGCATGTACTTATATATTTTCACTTAATTTTCACTTTTCACTTGCTGCCGTTGCGCCCTTCTGGCG\n+TGAACAGCTGCTGAGTTCTATTCTTAAATCTATGTTTGAGGCCTATGACCGCGCTAATCACCTCCCGCGTGGTCATATCT\n+CTTGACACTTCGGCAATAGACCGCTGCAATCGTACTTATCTGTAGTTGCCACTTATGCTGTCCAGTGACATGTTTATTGC\n+AGCCCATAGTTAGAACATATGTTCTTTTCTCTCTATAGTTTCCCTACTTATCATATCTAAACACATATTTAGACAGCAAC\n+GTTATGAGGAAATCGGTGCTGTATATGATGGATCGCTGTTTGTTACTACAATTGTGCCATTAAAATTAGTACACAATTGA\n+CAAAAAATAATTTGTATCAACAGAGAGCTTATATTAGCAGAAAAAGAAAACTTGGATACCCATATATTTGCTTAAAATAA\n+ACAACATTAATATATTTGTAAAACATGTCTTTTAAATGACACTAATAGACGTAAAAAAATTTTTTTCTTTAGTAAGGTAA\n+ATATTTTTATGGCATATCGGCCAGATCGTGTATATGGCAGCTATATGAAAGTTGACCAAATCACTTGAAACTTTGTGAAC\n+CATCTTGGTGGAAGTAAAGAGTAACACAAACCAAATTTTGTGAAGATAGGTCATCATTTCGAATTTCTGCCAAAAAAAAA\n+AAAAACACATAAAAAACCACTGTGCGATGGTACCCAATTCTAAAAAAGAAAAAGAGATCAGAGCTTTCTTTTGATTAACC\n+GGTTATAATCGTAAATTTATCCCAAATTACGCACACAAATGCCAGTAATTTAGCCCTTGGGTCAGTACTTCCTCAAAATT\n+ATCATCCAATGCAGACGAGTTAAATATAACAGTTTACCATTCCTAGTGACCATCAACCTCTTAGATGGCTGAATAACTTA\n+AAGGAGTCGAATGCTAAGTTGCAAAGATGGAGAGCCAAATTAAACGAATATCAATTTAAAATCAATTATATTAAAGGAAA\n+GCAGAAATCAATTAAAAATCCGATAAAAATAAAGTAAAGTCAATTAAACAATACAGTAGAAATGCCAAACAAATTTTACT\n+TTTACAAGAGCGATGCAGATTTTTAAATTATTCAAAAATTAATAACTCCTGCACTACAACACTACAGAGAAGAGAAGAGA\n+AGAGAGAAGAGAGAATAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAAGAGAGAAGAGA\n+GAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAAAA\n+GAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAG\n+AGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGA\n+AGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGA\n+GAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAG\n+AAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAACAGAGAACAGAGAACAGAGAACAGAGAAC\n+AGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGA\n+GAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAAGAGAGAAGAGAGAAGAGAGAA\n+GAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAGAGAAGAG\n+AGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGAACAGAGA\n+ACAGAGAACAGAGAACAGAGAACAGAGAAGAG\n+\n'
b
diff -r 000000000000 -r e7e7785e41d0 test-data/Ensembl_transposon_set.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Ensembl_transposon_set.fa Mon Sep 11 18:27:40 2017 -0400
b
b'@@ -0,0 +1,10060 @@\n+>FBgn0026065_Idefix\n+GTGACATATCCATAAGTCCCTAAGACTTAAGCATATGCCTACATACTAATACACTTACAA\n+CACATACACCCCAATACAACATACACTACTCCGGATGTACCCAACAGATACCAGATAAGA\n+ATAAGATTGTTATATGATCCTCGAGAATGGAAAAAACCCCAATTCTAGATAAGTCACCCA\n+CTGGTAGACTAAACATCCGTCCCCTAATTTAAACAATTCCTTGCTTAAGCCTCACCCCAT\n+CGTCACATTCCCACGTTCAAAGCTCGGAGCCGCAATCCCGAAAAACAAAAGTATCGATTT\n+CAATAAACAAATTATAAGAATCTAAGAGCACTTGTATCCAAGAGCAAATGCACTTGAATC\n+CAAGAGAAACGCAAAGCTTTTTCTCTTTACGATCAGAATCCTAAAGTCTAAAGTCCATAT\n+TAGAAAAGCTCGATACCGAGGCTTGAACGTCAACCAAATCAGAATAATTATCAGAGTTCA\n+GTTTGAGACCTAATTGTAAAAGGTTCGGTGTTCTTCTCAAATAAAAAGATTGTAATCATT\n+TAGTGAAATAAAAATTATATTTTTTTCACTTATAAATATTGCAAGTATTTAATTGGCGCA\n+GTCGGTTAGGATCCAATAAAATAAAAGAGTCCTTTTAGTACGGTACTGATCAACTGAAGG\n+ATATGCTATACGACTAGCTATCCAAGATCAGCGAATTAAAATAGTGATTCAAAAATATTT\n+TTTAATCCGCAAAAGAATCTACGTGAAAGTAGTATTCAAAATAAAATCCCGTGCGGTCGG\n+AAACAAAAATTAATTTAAATTTTTTAATTCCGAAACTTAAAACCAAGTTTAAAGAAAACT\n+TAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAA\n+CCAAGTTTAAAGAAAACTTAAAATCAAGAAAACTTAAAACCAAGTTTAAAGAAAACTCAA\n+AATCAAGAAAACTTAAAGCCAAAATAAGCTAGAAAACTAAAAGACATCATGGCAGTCCCA\n+CAACTCTCAGAAACACACCTAAACCAACTGCTAAACCAAATCAAAGAATTAAACTACTAC\n+GATGGCGCACCTGGCAAATTATCTGGATTCGTCAACCAAGTGGAACAACTGCTCAGTTTA\n+TACCCAACACAGGAAGCAAGACAGGCACACGTCATATATGGAGCAGTGAAGCGGTTATTA\n+GTGGATTCAGCCTTAGAAGTCGTAACCCAGGAAAGAGCTAACACATGGCTGGACATGAAG\n+AAAGCACTGGCAATGGCATTCAAAGACCATAGACCTTATGTAACTCTCATCAGACAATTA\n+GAAGACATATCATACCCAGGAAGTATCTGTAAGTTTATAGAAAAATTAGAAACACAATAC\n+TGGATTATGTTCGATAAGTTAGAATTAGAAAGTGACCATGTTGATAAATCGAATTATACC\n+GAAATGTTAAACAAAACTGTTAAATCAGTAATAGATCGAAAACTGCCGGATAGAATTTAT\n+ATGTCTTTGGCACGTAAAGATATTGATACAATTTATAAATTAAAACAAGCATCAATGGAA\n+TTAGGCCTTTATGATGCTATTCCAGAAAATCACCGTTCTAATAGAACAGAAATGAATAAA\n+CGTAGGAACAGGGGAAACTATAATCAAAATAATAATCAAAAATATTACAATAATAGAAAT\n+CACAACTACAGTAATTATTATCCTAGCATGAATCAGAATCATAATACACAACCACCTCAG\n+AATCCGACTCAACCTATGACAAATCAAAACCAATATTCACCGCGTTTCATACCGAATAAT\n+CAAAGAGGGAATTATTATGCATTTAGACGAGACTTAACACAAGCTCAGCAGAACAACCCA\n+CTTAATAACACCCTTAACTTCCAACCTTCGACATCGAATAATATTAACAGACAAGGGCCA\n+GTAAAAAGACAACGCGAGAGTCAGAGTGACCAAAGCAGGATGGATGTAAATTTTCATCAA\n+GCTGCCTCGGACACTCAAATGATAGAGAAGGACATACAAGTCCCTATGTAAAAATAATTC\n+ATCATAATAAAAATTATAAGGGAATGATCGATACAGGATCATCAATTAACATCATAAGAG\n+AAAATTTTGAGAACTTAGAAGAAAAGGAAGAAAACCTAATAGTATACACTATTAAAGGAC\n+CAATAACACTAAAGAGAAGTATAATAATAAAACCTACTTCAGTATGTCCGTCTGCTCAAA\n+AATTCTACATTCACAAATTTTCTGATAACTATGATTTCTTGTTAGGTCGAAAGTATTTAG\n+AAGATACAAAAGCTAAAATAGATTATGCTAACGAAACAGTAACACTAGGCTCAAAAGTAT\n+TTAAGTTTCTCTATGAAGAAAAGAAGGGCGAGACCGCATCCAAATGCCTTGACCCACAAG\n+AAAAGAATGATTCCGCTCTAGTGGACAGAACCAAACCAAAAATGCAAAAGGTTAAGACCG\n+CACCTAAGTGCCTTAAACCAAAGCATCAACAGCAGAAGAAAGAGACCGCATTACCCAAAT\n+GCCTCATTTCAAATGTTGTTAAAGACACAGTGGACAATGATGTAACACATCTCGATCCCA\n+TGTCCGTTGACAACGATATAGTCAACTTCGCGATTAACAATGAGTTACGCGAATGTAACG\n+AGTATAGACTCGAACACTTAAATGCAGAGGAAGTTGAATGTTTAAAGAAGTTCCTATACG\n+AATATAGAGACATTCAGTACAAAGAGGGCGAAAATTTGACCTTCACCAGTACTATTAAAC\n+ATGTCATCCAGACTCAACACGAAGACCCAGTATACCGTAAACCCTACAAGTACCCTCAAA\n+GCGTTGACCAAGAAGTTAACAAACAAATTAAAGAAATGATAGAACAAGGGATTGTTCGCA\n+AATCGAAGTCCCCTTATTGTTCTCCTATTTGGGTGGTCCCCAAGAAGGCAGACGCCTCTG\n+GGAAACAAAAATTCAGGTTGGTAGTCGATTACAGGAACCTAAATGAGATAACTGTTAACG\n+ACAAATTTCCCATTCCCCGAATGGATGAGATATTGGACAAACTAGGTAGATGCCAATACT\n+TTACCACTATAGATCTAGCCAAGGGTTTTCACCAAATCCAAATGGATGAAAATTCTATTG\n+CAAAAACAGCTTTTTCAACTAAGCATGGGCATTATGAATATACTCGTATGCCCTTTGGTT\n+TAAAAAACGCTCCAGCTACTTTTCAGAGATGCATGAATAATCTTCTGGAAGATTTAATCT\n+ACAAAGACTGTTTAGTCTATTTAGACGATATTATTGTTTATTCCACTCCATTGGAAGAAC\n+ACATTTTATCCCTAAAGAAAGTCTTTGAAAAACTGAGAGACGCTAATTTAAAGTTGCAAC\n+TAGATAAATGTGAATTCATGAAGAAAGAAACTGAATTCCTAGGACACATCGTCACAACAA\n+ATGGCATCAAACCAAATCCAAATAAAACTAAAGCAATTACAAATTTTCCATTACCCAAGA\n+CACCTAAGCAAATAAAATCATTTTTGGGATTATGTGGATTCTATCGCAAGTTTATTCCTA\n+ACTTTGCCAAAATAGTTAAACCCATGACCCTCAAATTAAAGAAAGGTGCTATAATAGACA\n+CCAAATGTAAAGAATACATCGAATCATTTGAAAAATTAAAAGTTTTGATAACTTCAGACC\n+CGATATTAATCTATCCTGATTTTTCAAAACCTTTTTCTTTGACAACTGATGCTAGCAACG\n+TAGCTATTGGTGCAGTGTTATCACAAAATCACAAGCCAGTTTGTTATGCCAGTAGAACGC\n+TAAACGAACATGAAATCAACTATGCTACGATTGAAAAAGAATTGTTAGCTA'..b'CCC\n+GTTCCAGAAAATTCTACCAGCAAACGAGCTCGCCTACACCCAAATGTTAAGTGCCACTTT\n+TGTGGAAAAATTGGCCACAAGATAGCTGACTGCCGCTCCATGAAAAACAACTTAAAGAAT\n+CAACAAGGATCTAGTTCGAGTATTGGGCGCTTATCTGACTCTAAACCTGGGTCAATTACT\n+TGCTATAGATGTGGAAACCAGGGGCATATAGCGTCAGCTTGCCCTGCAAGACAATCGTTG\n+TCAAACCAAACTAAAGCCGACGAGAAGCGTGTCAACGTGTGTCACGTAGTCGAGCCAATT\n+GGGACATTGATATCATCTGGTGAGTCGTATCCATTTTATTTCGACTCTGGAGCCGAATGC\n+TCACTTGTAAGAGAATCTGTGTCCACCCAACTCTCGGGCACACGAATTAACAACAATGTA\n+GTTTTAAAGGGTATCGGAAATAATACTGTTACCAGTACATTACAAATTTTGTCAAACGTA\n+ACAATAAGTGGTTACTGTCTCGAAGTGCTTTTTCACGTAATTCTTAATGATTGCATTAAT\n+TATAATATTATAATTGGACGCGAAATTTTAAGTCAGGGATTTAGTGCTACTATAACAATA\n+GATAAAATAGAGTTATGTAAAACAAGGTCTGTGCAAACCCTATCTGCTTAGAGTAGTAGT\n+TTTAGTCTTGAAAATGTTAATACCGAATTGTGTGGCGAGGATAGGAAAATCTTGGTAAAT\n+CTTTTGAATAAATTCTGTGACTCATTTATAGACGGTTTTCCCAAAAATCGTGTTACAACT\n+GGCGAACTAGAAGTACGCTTAATTGATCCAATAAAAACTGTACACAGACGACCGTACCGA\n+CTTAGTATAGAGGAAAAACAAATTGTCCGAAACAAGGTTAATGAGCTGCTGTTAGATAAC\n+ATCATCCGTCCTAGCAGCTCACCGTTCGCCAGTCCAGTTTTACTCGTTAAAAAGAAAAAT\n+GGTTCTGATCGCCTTTGCGTGGATTACCGCGAACTAAATACAAACACAGTTGCAGAGAAA\n+TATCCCTTACCACTAATTAGTGACCAAATATCTAGGTTGCGTGGAGCAAGTTTCTTTAGT\n+TGCTTGGATATGGCCAGCGGGTTTCATCAGATACCTATTCACGCAAATTCAATTGAGCGC\n+ACGGCTTTTGTGACACCTGACGGCCAATTCGAATTTCTAACTATGCCCTTCGGGTTAAAG\n+AATGCCCCATCCGTGTTCCAGCGTGCAGTTATGAAAGCTTTGGGTGAGCTTGCCCACTCT\n+TACGTTATCGTTTATATGGACGATATAATGATTATCGCAGAAACAAAAGAAGAAGCTTTT\n+GTAAGGTTAAGGACAGTTTTGAAAATATTATCGCAGGCTGGGTTTTCTTTTAATATCGGA\n+AAATGTTCATTCCTGAAATCTTGCATTGAATATCTGGGGTTTGTGGTAAAAGAGGGCGAA\n+ATAAGACCAAATCCATCTAAGATAAAAGCATTAGTCGCTTTACCGCCTCCGCAGTCTGTT\n+ACCCAAGTAAGACAAATTATTGGCCTAGCCTCTTATTTTAGGCAGTTTGTGCCAAAGTTT\n+TCAGAAATCATGAAACCCTTATATAGACTGACCTGCAAAAACAAAATATTTGAATGGAAA\n+CTTGAACACGAACAAATTCGTCAAAAAGTCACTAAATTGCTTACAGATGAGCCCGTCCTT\n+GTTATCTTCGATCCTCGGCATCCCATTGAACTGCATACAGATGCCAGTATGGATGGCTAC\n+GGAGCAATTCTACTCCACAAAATAGATAATAAACGTCGTGTAGTTGAGTATTACAGCAAA\n+CAAACATCCTTGACGGAATCTCGATATCATTCGTACGAGCTTGAAACTTTAGCTGTGTAT\n+AACTCCATGAGACACTTTCGTCACTATTTACATGGGCGAATTTGTTGTTTTTACAGACTG\n+TAATTCCCTAAAAGCTACTCGCAACAAGACTGAACTAACGCCGAGAGTACACCGTTGGTG\n+GGCATATATGCAGTCCTTCGACTTTGACTAGAATGACTTAGACTTAGAATATAGACCTGG\n+TGCCATAATGGCACATGTTGATTTCTTGTCACGCAATCCACTGCCATCTGCTCGGGTTAT\n+TACTGGTGAGGAAGAAAAACATGTTCTATTGGCCAAAATAACGGACAACTGGTTACTTGC\n+AGAACAGCAAAAGGATTCAGAGATTTCCACGATTGTTGTTAAAATACAGAACAATGAATT\n+GGGTGAGAGCTCGGCAAAAAGTTATGAATTACGCTCGAAAATGCTTTTTCGCAAAATTCA\n+AAGGAACGGTAAAACTCGTTGCCTGCCAGTTGCCCCCAGATCATTCAGATGGTCAGTAGT\n+GAACCAGGTCCATGAAGCAGTTGTACATTTGGGTGGGAAAAGACTTTAGACAAAATGTAC\n+GAATTTTACTGGTTTGAGAACATGGCCAAATATGTTCGTAAGTTCGTTGATAATTGCATT\n+ACGTGTAAGTTAACTAAGCCTCCGTCAGGAAAATTGCCAATCGAACTCCACCCCATACCA\n+AAAGTAGAAATTCCATGGCTATAAGTTGTACGACAAATCGCATAACGAAAGCCAGTCCTC\n+TTGAATTACTAATCGGAAAAGAATGTAGACCATTTAATATGTTACCAATATGTGAACAAG\n+TTAATAAAGTCGATGTAAATATTATAAGAAATATCGCGAGAGAAAATATTAAGAAGAACG\n+CCTTGTATGAAAAAACTAGATTCGATAAGCACAAAGCCAAATTTGATAACTTTGGTGTTG\n+GCGATTATGTTTTACTTAAGAACGAAGAAAGGCACCAAACAAAATTAGACCAAAAATATA\n+AAGGACCTTTCCTCGTGACAGAGGTACTTAAGGGAGATCGTTATATTTTAAAATCTTTAA\n+CTAATAAGCGGACTTATAAGTACCCACATGAAGCTTTGCGCAGTATGCCAACAGAGGAGA\n+TCCCCAAAGAGTTAGATCTATGTGACGATCAAGAAAACGTTGAAAGAGACGTTAGAAATC\n+CCTTGGTGGATTCCAATGTGGATGAAAACGTCGAAAGAGACGTTAGAAATCCCTTGGTGG\n+ATTCCAATGGGGATGAAAACGTTGAAAGAGACGTTAGAAATCCGTTGGTGGATGCCAATG\n+TGAGCGAAAAGTTACTGAGTTGTTTGAAGACTCAAGTGAATGAGAGGCATTGATGGATTT\n+CAATGCGAGATTGGGGACACATGCAACGTCGCCAAGTTGCCAGTGCTAGTAGGTACAAGT\n+GTTACTGTGTTGACTTATTTGATGTCTGGTGACTGGCGGCGTGGCGGGTTGAATTGTCCT\n+AGTGTGTTGCTAATAATAACAAACGATCTTCTTGGTACTTCTGTCACTCGAGTTGGTCGA\n+TAACAAGAAAAATAATAATAATAATTACGTTTAATGTTATCTTTCTAGATTAAGCTTGTT\n+TAATTTCAAAACTTATATTACACACGAGGACGTGTGCTGGTCAGGAAGGCCGTGTCGCAT\n+CATTATTAGTCTTATTTTTATTTTCTATGTTCCATCTCTAATAAACATGTCATCTCTATT\n+AAATAAAATTCGTATCGAGCTGTTCTTGTCTTCGTTTCTCTTTGATCGCTGTTCGCTGTG\n+TTCCGTTATGCGAGTTTAACGGGTTTTGCTCTGTTCTACATAGTCTCGGTTCGACGATGC\n+GTTAGAGTGAGACAAATGCTTGTCCTGTGGTGAGTTCGGACCAGCATGTATCAAGCGAGA\n+TAGAGCGATGTTGAAATGTACACGGGGCACTTATGTTTGAAAACTCTGAGAAAGCGGACG\n+CGTGAATATGTCGCAACCGAGGAAGTGTACGACTCGCGGGCGGAGCGCGGCAACAGAGGA\n+CCCCGAATCAGTTAACTTCCCGACA\n'
b
diff -r 000000000000 -r e7e7785e41d0 test-data/dme_miR21_hairpin.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dme_miR21_hairpin.fa Mon Sep 11 18:27:40 2017 -0400
b
b'@@ -0,0 +1,774 @@\n+>dme-mir-1 MI0000116 Drosophila melanogaster miR-1 stem-loop\n+TTCAGCCTTTGAGAGTTCCATGCTTCCTTGCATTCAATAGTTATATTCAAGCATATGGAA\n+TGTAAAGAAGTATGGAGCGAAATCTGGCGAG\n+>dme-mir-2a-1 MI0000117 Drosophila melanogaster miR-2a-1 stem-loop\n+GCTGGGCTCTCAAAGTGGTTGTGAAATGCATTTCCGCTTTGCGCGGCATATCACAGCCAG\n+CTTTGATGAGCTTAGC\n+>dme-mir-2a-2 MI0000118 Drosophila melanogaster miR-2a-2 stem-loop\n+ATCTAAGCCTCATCAAGTGGTTGTGATATGGATACCCAACGCATATCACAGCCAGCTTTG\n+ATGAGCTAGGAT\n+>dme-mir-2b-1 MI0000119 Drosophila melanogaster miR-2b-1 stem-loop\n+CTTCAACTGTCTTCAAAGTGGCAGTGACATGTTGTCAACAATATTCATATCACAGCCAGC\n+TTTGAGGAGCGTTGCGG\n+>dme-mir-2b-2 MI0000120 Drosophila melanogaster miR-2b-2 stem-loop\n+TTGTGTCATTCTTCAAAGTGGTTGTGAAATGTTTGCCTTTTTATGCCTATTCATATCACA\n+GCCAGCTTTGAGGAGCGACGCGA\n+>dme-mir-3 MI0000121 Drosophila melanogaster miR-3 stem-loop\n+GATCCTGGGATGCATCTTGTGCAGTTATGTTTCAATCTCACATCACTGGGCAAAGTGTGT\n+CTCAAGATC\n+>dme-mir-4 MI0000122 Drosophila melanogaster miR-4 stem-loop\n+TTGCAATTAGTTTCTTTGGTCGTCCAGCCTTAGGTGATTTTTCCGGTCATAAAGCTAGAC\n+AACCATTGAAGTTCGTTGTGG\n+>dme-mir-5 MI0000123 Drosophila melanogaster miR-5 stem-loop\n+GCTAAAAGGAACGATCGTTGTGATATGAGTTGTTTCCTAACATATCACAGTGATTTTCCT\n+TTATAACGC\n+>dme-mir-6-1 MI0000124 Drosophila melanogaster miR-6-1 stem-loop\n+TTTAATGTAGAGGGAATAGTTGCTGTGCTGTAAGTTAATATACCATATCTATATCACAGT\n+GGCTGTTCTTTTTGTACCTAAA\n+>dme-mir-6-2 MI0000125 Drosophila melanogaster miR-6-2 stem-loop\n+TAACCCAAGGGAACTTCTGCTGCTGATATATTATTGAAAAACTACTATATCACAGTGGCT\n+GTTCTTTTTGGTTG\n+>dme-mir-6-3 MI0000126 Drosophila melanogaster miR-6-3 stem-loop\n+CAAAAAGAAGGGAACGGTTGCTGATGATGTAGTTTGAAACTCTCACAATTTATATCACAG\n+TGGCTGTTCTTTTTTGTTTG\n+>dme-mir-7 MI0000127 Drosophila melanogaster miR-7 stem-loop\n+GAGTGCATTCCGTATGGAAGACTAGTGATTTTGTTGTTTGGTCTTTGGTAATAACAATAA\n+ATCCCTTGTCTTCTTACGGCGTGCATTT\n+>dme-mir-8 MI0000128 Drosophila melanogaster miR-8 stem-loop\n+AAGGACATCTGTTCACATCTTACCGGGCAGCATTAGATCCTTTTTATAACTCTAATACTG\n+TCAGGTAAAGATGTCGTCCGTGTCCTT\n+>dme-mir-9a MI0000129 Drosophila melanogaster miR-9a stem-loop\n+GCTATGTTGTCTTTGGTTATCTAGCTGTATGAGTGATAAATAACGTCATAAAGCTAGCTT\n+ACCGAAGTTAATATTAGC\n+>dme-mir-10 MI0000130 Drosophila melanogaster miR-10 stem-loop\n+CCACGTCTACCCTGTAGATCCGAATTTGTTTTATACTAGCTTTAAGGACAAATTCGGTTC\n+TAGAGAGGTTTGTGTGG\n+>dme-mir-11 MI0000131 Drosophila melanogaster miR-11 stem-loop\n+GCACTTGTCAAGAACTTTCTCTGTGACCCGCGTGTACTTAAAAGCCGCATCACAGTCTGA\n+GTTCTTGCTGAGTGC\n+>dme-mir-12 MI0000132 Drosophila melanogaster miR-12 stem-loop\n+TACGGTTGAGTATTACATCAGGTACTGGTGTGCCTTAAATCCAACAACCAGTACTTATGT\n+CATACTACGCCGTG\n+>dme-mir-13a MI0000133 Drosophila melanogaster miR-13a stem-loop\n+TACGTAACTCCTCAAAGGGTTGTGAAATGTCGACTATTATCTACTCATATCACAGCCATT\n+TTGATGAGTTTCGTG\n+>dme-mir-13b-1 MI0000134 Drosophila melanogaster miR-13b-1 stem-loop\n+CCATGTCGTTAAAATGTTTGTGAACTTATGTATTCACAATCATATCACAGCCATTTTGAC\n+GAGTTTGG\n+>dme-mir-13b-2 MI0000135 Drosophila melanogaster miR-13b-2 stem-loop\n+TATTAACGCGTCAAAATGACTGTGAGCTATGTGGATTTGACTTCATATCACAGCCATTTT\n+GACGAGTTTG\n+>dme-mir-14 MI0000136 Drosophila melanogaster miR-14 stem-loop\n+TGTGGGAGCGAGACGGGGACTCACTGTGCTTATTAAATAGTCAGTCTTTTTCTCTCTCCT\n+ATA\n+>dme-mir-263a MI0000343 Drosophila melanogaster miR-263a stem-loop\n+TAGATCTCGGCACAGTTAATGGCACTGGAAGAATTCACGGGGTAATTTTTATACAACCCG\n+TGATCTCTTAGTGGCATCTATGGTGCGAGAATAA\n+>dme-mir-184 MI0000354 Drosophila melanogaster miR-184 stem-loop\n+GGTTGGCCGGTGCATTCGTACCCTTATCATTCTCTCGCCCCGTGTGCACTTAAAGACAAC\n+TGGACGGAGAACTGATAAGGGCTCGTATCACCAATTCATC\n+>dme-mir-274 MI0000355 Drosophila melanogaster miR-274 stem-loop\n+TCCTGTGTTGCAGTTTCGTTTTGTGACCGACACTAACGGGTAATTGTTTGGCCGCCAGGA\n+TTACTCGTTTTTGCGATCACAAATTATGAAATTGCAGCAA\n+>dme-mir-275 MI0000356 Drosophila melanogaster miR-275 stem-loop\n+TGTAAAGTCTCCTACCTTGCGCGCTAATCAGTGACCGGGGCTGGTTTTTTATATACAGTC\n+AGGTACCTGAAGTAGCGCGCGTGGTGGCAGACATATAT\n+>dme-mir-92a MI0000357 Drosophila melanogaster miR-92a stem-loop\n+AATATGAATTTCCCGTAGGACGGGAAGGTGTCAACGTTTTGCATTTCGAATAAACATTGC\n+ACTTGTCCCGGCCTATGGGCGGTTTGTAATAAACA\n+>dme-mir-219 MI0000358 Drosophila melanogaster miR-219 stem-loop\n+TAATTCGATTTTTA'..b'A\n+GGAATACATGTGCCTGTATATATATTCGGTAAAATCACATCTTTGAATT\n+>dme-mir-4980 MI0017766 Drosophila melanogaster miR-4980 stem-loop\n+GAAGAGGCTGAGAGCGGGAAAGTGAGAAGTAGAGAGCGCAGATAGGGAGTGGGAGCTTAT\n+ACCGTTATAAAGCCAACTTCCGTTCTGCGTTTCGTTCCAACCCCCCACCTCTCTTTCCCT\n+CT\n+>dme-mir-4981 MI0017767 Drosophila melanogaster miR-4981 stem-loop\n+TGGCCACGTGCCCGCAAGACGCCTTCGGCCGGCGCAAAGTTTCAATTTGATATTCCTTGG\n+CCGGTCGAGCAGCGTCTTGGGGTTGTTCGTGGATT\n+>dme-mir-4982 MI0017768 Drosophila melanogaster miR-4982 stem-loop\n+CCAACTTTGCTGGCATTCGGTGGCCAATAATGCAATTGCATGTCCTTGTTGCTGTTGCAA\n+TGTTGCAGCTGGAATTGCAATTGCATTTCTAGTTGCCGCTGCTGCTGCTCGAGTGGCAA\n+>dme-mir-4983 MI0017769 Drosophila melanogaster miR-4983 stem-loop\n+GGCTGGCTGTCTTGACCCACTTGCTCGTTTGCATTTCTGAAAAATGTATACGCAAAAACC\n+AAAATCAGAAATTCTTTTGAGCAAGTGTGTCATGAAATTGCCATTCGT\n+>dme-mir-4984 MI0017770 Drosophila melanogaster miR-4984 stem-loop\n+CTTCGCTCGAGGTGTGAAAACCTTTTGGCCAGCGAATACGCCTCGGATCCAATCACGTTT\n+CAAGATCGAGGTGAATTCTTTGACGTATTCGCTGGCCAAATGACACGTTCGCCATGTTTT\n+GCG\n+>dme-mir-4985 MI0017771 Drosophila melanogaster miR-4985 stem-loop\n+TTGGGGGCGCTGCACTGGCATTGAAAAGTGAATTACATTGATCGTGACATGGGAATGGAA\n+AATGTCGCCGAGCCATGTAAATCGTTTGGCCTACCCGTCTA\n+>dme-mir-4986 MI0017772 Drosophila melanogaster miR-4986 stem-loop\n+TTCTGCCGCTTTTGCTGTGGCTTCTCTGCATGGGATTCCCCATTCTGCATGGCGCCGATC\n+TCTGCCAGCCCATCGGATGGCGGAACTTCCAGTGCAGCGAGGTCGCTTCTCTGCAGGATC\n+TG\n+>dme-mir-4987 MI0017773 Drosophila melanogaster miR-4987 stem-loop\n+GCGGTGCCGTTGATGATGACACAGCGCGCTTGCAACAGCGTGCGGCACGATTCTCACAGC\n+AGGGCTCCAGCTCGGCCAAAAAATCGGTCGTCGCCATTGCAAGCTCACCGTTTGGTCTCA\n+CCACGGCCAAGA\n+>dme-mir-9369 MI0028918 Drosophila melanogaster miR-9369 stem-loop\n+AGCGAGAGGGTATTATGTATTCATATTTGTAATATCATGATACATATTCTCCTTTCGCTC\n+TA\n+>dme-mir-9370 MI0028919 Drosophila melanogaster miR-9370 stem-loop\n+TTTGGGTTGCCGGTACCGGGTGGTTTCCCCGATATCCACGACCCATACCA\n+>dme-mir-9371 MI0028920 Drosophila melanogaster miR-9371 stem-loop\n+AAATCACTTTGCTTGGAATTCACATTGATGTACGTAAGAATTCATCACGAAGTGGTTTCT\n+G\n+>dme-mir-9372 MI0028921 Drosophila melanogaster miR-9372 stem-loop\n+TAAAAGCACATTTCGCCAGCACGTGTACTTTCCACCTCGTCTGTGATTTTGGTTTTTAAC\n+G\n+>dme-mir-9373 MI0028922 Drosophila melanogaster miR-9373 stem-loop\n+AGTTTGTGAGGAGCGTTCGGCGGATACACATCGCACCCATCGCTCTTGGCCAGCTCGTC\n+>dme-mir-9374 MI0028923 Drosophila melanogaster miR-9374 stem-loop\n+CGTGCAATAATTTCCTCGATTGGCATCAAGTGGCTTCCAGTCGGGTACATATTGCACAAG\n+A\n+>dme-mir-9375 MI0028924 Drosophila melanogaster miR-9375 stem-loop\n+CCGAGTATATGGAATTTCTGTTTTGCCTAGTATGAACCGAAACTCGATATAATTCAGAA\n+>dme-mir-9376 MI0028925 Drosophila melanogaster miR-9376 stem-loop\n+ATAGCCAGCATGGCCAAAGCGCTGTGCTTAAGTGGTCCAAGCGACCGAAAGCACTTTGAC\n+CATGCGGTGGTGGCCA\n+>dme-mir-9377 MI0028926 Drosophila melanogaster miR-9377 stem-loop\n+CGCTCTTTCTCTCTATCTGGCCTGGCCTGGCTTGATGGGGAGAAGGAGAGGG\n+>dme-mir-9378 MI0028927 Drosophila melanogaster miR-9378 stem-loop\n+AGTGGAGTGAGACCTCGAGTTCGGGCAAATCTGATAGCCGAACGGGAGTTTCCTCCATTG\n+GG\n+>dme-mir-9379 MI0028928 Drosophila melanogaster miR-9379 stem-loop\n+CGCATGTCAGGTGGTAATCCATTAGTATAAACATGGTATGGCCACTTGACAGTCGGC\n+>dme-mir-9380 MI0028929 Drosophila melanogaster miR-9380 stem-loop\n+GCTCCTGCTGCATCTGTTGGCGATAGCGCTCGTCACTCTTGGCCGTGTACTCCGCCTCCA\n+ACAGATCCAGCAGCAGA\n+>dme-mir-9381 MI0028930 Drosophila melanogaster miR-9381 stem-loop\n+GCCAGGGAAGGGTCGAGTGCGGATCCCTTGCCCCAGCGGCTATTCGCCTGCGCACTCGGT\n+CCATCCCTGGCTCA\n+>dme-mir-9382 MI0028931 Drosophila melanogaster miR-9382 stem-loop\n+ATCACAGTGTGGCTGAGGGATTCCACTTTCCTTATAACTTTTGCCGATCCCCGGCGCCAC\n+TGTGATCGT\n+>dme-mir-9383 MI0028932 Drosophila melanogaster miR-9383 stem-loop\n+GGGTGCAGATCAAGTGCGAGCTGCGCATTCTCTCGCAGTTCGCCTTCAATCTGAACCCCG\n+A\n+>dme-mir-9384 MI0028933 Drosophila melanogaster miR-9384 stem-loop\n+AGGCATAATTCAACTCACACGTCTACAGGTACATATGTGTGTGTTCGGTTATGTACTTTG\n+>dme-mir-9385 MI0028934 Drosophila melanogaster miR-9385 stem-loop\n+ACAGTGTATGCAAATGATGAATGTGCCATATCAGAAACTATTCCTCATTGCTATACCTGT\n+GGT\n+>dme-mir-9388 MI0028938 Drosophila melanogaster miR-9388 stem-loop\n+CAAGTATTTTGGTACGTATGTATGTATGTACATACATGTGTATATGGTACATTCATGTAG\n+TACATACATACATATGTATGCAAGTACGTACG\n'
b
diff -r 000000000000 -r e7e7785e41d0 test-data/sample.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.fastq Mon Sep 11 18:27:40 2017 -0400
b
b'@@ -0,0 +1,12000 @@\n+@HWI-1\n+GAGGTACTTGAAAATGACAAATGAAC\n++\n+BBBBBG1FGGGDFGDGGGGGGGGGGG\n+@HWI-2\n+TCACTGGGCTTTGTTTATCTC\n++\n+BBB@ACF@FGGGGGGGGGEFG\n+@HWI-3\n+TTACGGCATGCATTGTGAAACTAAAAA\n++\n+ABB0ACDGGE1EGGGGGCGEDGGFGGG\n+@HWI-4\n+TCCGAAGATTGCAGGTTCGTGTCCT\n++\n+BCB@B>E/E11=E1=FFF<F=C::F\n+@HWI-5\n+CTTAGCAAAGCTCCAGTCCTG\n++\n+ABCCBFFGGGGGGGGGGGGGF\n+@HWI-6\n+CTCTCTCAGTCGCTTCCCGG\n++\n+AAA001ED111//;;/@@F<\n+@HWI-7\n+TATTGCACATTCACCGGCCTGA\n++\n+A=3=BE@FGG1=FGGGGGGGGF\n+@HWI-8\n+TCACTGGGCTTTGTTTATCTCA\n++\n+CCCCCGGGFGGGGGGGGGGGGG\n+@HWI-9\n+AGCCCGAGACCTGGACTTGAACCACCTG\n++\n+BCCCCGGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-10\n+TCATCTTCGGAAAGTAGTACTCGGAGA\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-11\n+TCTGAGCCTCAAATTCGCTTAAGCGGT\n++\n+B@BBCGGGGGFG1@FGGGGGGGGCGDG\n+@HWI-12\n+GATAAGCAAAGAACTTTTT\n++\n+CCCCCGCGGGGGGGGGGGG\n+@HWI-13\n+TCTAAATCAAGAATTCCATTAGTA\n++\n+CCCCCGGGGGGGGGGGGGGGGGGG\n+@HWI-14\n+ACCAGATTATGGAGGACA\n++\n+CCCCCGGGGGGCGGGGGG\n+@HWI-15\n+TGAGAGGACAGCTCCGATGGCC\n++\n+BBBBAFGGGGGGGGGGGGGGGG\n+@HWI-16\n+TCACTGGGCTTTGTTTATCTC\n++\n+CCBBCFGGGGGGGGGGGGGGG\n+@HWI-17\n+GAATTCCACCACGTTCCCGTGG\n++\n+BBBBBGGGGGGGGGGGGGGEGG\n+@HWI-18\n+AAGTGCTGATAGATTTAT\n++\n+BBCCBGGGGGGGGGGGGG\n+@HWI-19\n+CTGGCTGAGACCTTCACATATAGG\n++\n+BBABAGBGGGGGFGGGGGGFFGGG\n+@HWI-20\n+TTTTAGAGATTTCTTCAGGCCTGAGA\n++\n+CCCCCGGGEGGGGGGGGGGGGGGGGG\n+@HWI-21\n+TCACTGGGCTTTGTTTATCTCA\n++\n+BBB?0ECBGFGCGGGGGGGGGG\n+@HWI-22\n+TCTTTGATGATTTTAGCTGTAA\n++\n+BBBBBG1CFG@GGG>FGGGGGC\n+@HWI-23\n+TCCAGGATTTGGGAAACATTAACTAGCG\n++\n+BCBCBGGGGGGGF@BFGGGGCGGGCGGG\n+@HWI-24\n+TATTCCTGGATCACCGAATCGTATCTC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-25\n+TTATTCTTTCCGTCCATCCGTA\n++\n+BBBBBFGGGGGGGGGGGGGGGG\n+@HWI-26\n+TTCCAATTCCGTAACCTGTTGAGTATC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-27\n+CGCAAAAGTTGACCGAACTTG\n++\n+CCCCCEGGGGGGGGGGGGGGG\n+@HWI-28\n+TTAGGTAAAGCCAGAGAAGAAGAGCTC\n++\n+CCCCCGGGGGGGGGGGFGGGGGGGGGG\n+@HWI-29\n+TTTCGAGACCACTCTAATCCAT\n++\n+CCCBBGGGGGGGGGGGGGGGGG\n+@HWI-30\n+ACTACTGTGGAGACATTTTTT\n++\n+CCCCCGGGGGGGGFGGGGGGG\n+@HWI-31\n+TATTAATAAGCTTGTTAGGTTGCAT\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGG\n+@HWI-32\n+TATATAAGATCAAATCTGTTACGGCCCT\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-33\n+TGAGATCATTTTGAAAGCTGATT\n++\n+CCCCCGGGGGGGGGGGGGGGGGG\n+@HWI-34\n+TCGCACCAGAGGAGATCGCGCAGGAAA\n++\n+CCCCCGGGGGGGGGFGGGGGGGGGGGG\n+@HWI-35\n+CAGCAGTCCTCGGAACCAGGC\n++\n+CCCCCFGGGGGGGGGGGGGGG\n+@HWI-36\n+CCATGTCCCGTTGCTATCGACTAGA\n++\n+BBBBCGGGGGGGGGGGGGGGGGGGG\n+@HWI-37\n+TCCTACGAATCGCTGTATGAACAG\n++\n+B@@0A=F>EBFGGGGGGGECC11C\n+@HWI-38\n+ATAACTGTGGTAATTCTAGAGCTAATAC\n++\n+BCBCCGGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-39\n+TCTATAAAACTTTGTAAATTTTCGTGCC\n++\n+BCCCCGGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-40\n+TAAGGATGGATTGCAATGCACACGTGC\n++\n+CCCCCGGGGGGGGGGGGGGDGGGGCGG\n+@HWI-41\n+TTCATGAGATCAACTTCGTTTATTTTG\n++\n+ABABBC1GGGDGGGGGGGGGGGGGGGG\n+@HWI-42\n+TCACTGGGCTTTGTTTATCTC\n++\n+B@B001;?FGGGGGGGGGGGG\n+@HWI-43\n+TTCGAGAACTTTACAGGATAGAGAACT\n++\n+CCCCCGGGGGGGGGGGGGFGGGGGGGG\n+@HWI-44\n+ATCTTTATCTCTCTTGGCTCGGCC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGG\n+@HWI-45\n+CATTGCACTTGTCCCGGCCTAT\n++\n+BCCCCFGGGGGGGGGGGGGGGG\n+@HWI-46\n+TGAGATCATTTTGAAAGCTGATT\n++\n+BAABAGGFGG11>;FGGGGCGGG\n+@HWI-47\n+AAGAAGACCCTTTTGAGCTTGACT\n++\n+BBBBBGGGGGGGGGGGDGGGGGGG\n+@HWI-48\n+TAAAAACGTAGACAGACATCACCTGT\n++\n+CCBCCDGGGGGGGGGGGGGGGGGGGG\n+@HWI-49\n+TATTGCACTTGAGACGGCCTGA\n++\n+3AAABEG@DFGGDGC>><EGGB\n+@HWI-50\n+TTATTAGAAAATCTGTGCACATAAA\n++\n+BB@A0@;1FGGGE@@>1BDG1FGG@\n+@HWI-51\n+CAGACGATGAAACCTTTGATAGCTAGG\n++\n+CCCCBGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-52\n+TCAAACTGTGTTCAAAAA\n++\n+CCCBCGGGGGGGGGGGGG\n+@HWI-53\n+TGAACACAGCTGGTGGTATCC\n++\n+CBCCCGGGGGGGGGGGGGGGG\n+@HWI-54\n+TGCGACCACAGTAAGTATCCTGAAAG\n++\n+BAABBGGGGGGGGGGGGGGGGGEGGG\n+@HWI-55\n+TAAATGTAGTGTACCTAGAGACTGAAT\n++\n+=BBBBFGGGDGFGGFGGGGEGGGEGGG\n+@HWI-56\n+TATTGCACTTGAGACGGCCTG\n++\n+BBAB=1?FGGG>FDGGGGGGG\n+@HWI-57\n+TAATACTGTCAGGTAAAGATGTC\n++\n+BBBBAFGGGGGGGGGGGGGGGGG\n+@HWI-58\n+TCATTGGAGCAGACGTATATACTC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGG\n+@HWI-59\n+TAACCGTGCAGAAAATTGTAAACTGAA\n++\n+A=BB@;FGGBG>FDB@EGGF>F>GB@D\n+@HWI-60\n+TATTGCACTTGAGACGGCCTGA\n++\n+CCCCBGGGGGGCGGGGDGGGGG\n+@HWI-61\n+ACCGTCGTTAAGAACAATTAGGCTGT\n++\n+BBBCBFGGGGGGGGGGGGGGGGGGGG\n+@HWI-62\n+TTTCGGATTCAATGTTATTATTATT\n++\n+CCCCCGGGGGGGGGGGGFGGGGGGG\n+@HWI-63\n+TTTCCTGGATTTCGAGCTGA'..b'GCCAGCCTG\n++\n+ABB0:FDGGGGGGGGEGGGGGGGG\n+@HWI-2940\n+TATTGCACTTGAGACGGCCTGA\n++\n+BBBCBGCBGCG@F>GGGGDGGB\n+@HWI-2941\n+CGAGAAAGCAACTCACCGTACTG\n++\n+BBCBBGGGGGGGGGGGGGGGGGG\n+@HWI-2942\n+TCACGTTTTCTGCGTATCGGCGGAGG\n++\n+AA:0AGGGG1DDGGGGGGGGGGG>GC\n+@HWI-2943\n+TCACATTGTCGCTGAGAACGTATG\n++\n+BBBBBGEGGF;>FGGBGFGGBGG1\n+@HWI-2944\n+TGTGACTCGGAGAATACAAGATAAAGAA\n++\n+CBCCCGGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-2945\n+TATCATAATAATCTGGTTGGTTATG\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGG\n+@HWI-2946\n+TCACTGGGCTTTGTTTATCTCA\n++\n+CCCCCGGGGGGGGGGGGGGGGG\n+@HWI-2947\n+TAATGAGATCCCTCGGTTGCCTTCT\n++\n+CCCCCGGFGGGGGGGGGGGGGGGGG\n+@HWI-2948\n+TTTCATCGAGATATTAAGGACTGGGGA\n++\n+BCCCCGGGGGFB@DEGGC1FGG@0=CF\n+@HWI-2949\n+TAGAAATTTCTCTGTTGTTAGATAG\n++\n+BABBBGGCGGGGGGGEGGGGGGGGG\n+@HWI-2950\n+AACCAATATGTTACGACTTA\n++\n+CCBBBGGE1?F;@GCFCGDG\n+@HWI-2951\n+CTACATATGGTTGAGGGTTGTA\n++\n+CCCCA1FE;1E>FGGGGFGGGE\n+@HWI-2952\n+TTCTTGACAACGACTATGCGACTG\n++\n+B@BBBFGGGGGG=CGGGGGGBGGG\n+@HWI-2953\n+TATTGCACATTCACCGGCCTGA\n++\n+BBBBCFEG>;11EFGDGGGGGF\n+@HWI-2954\n+TATCACAGCCAGCTTTGAGGAG\n++\n+B@ABBG1F>GGGGCDCGC=11/\n+@HWI-2955\n+TTCGCGCTCTGACTATTATAAA\n++\n+CCBCCGGGGGGGGGGGGGGGGG\n+@HWI-2956\n+AATTGCACTAGTCCCGGCCTGC\n++\n+BBCCCGGGGGGGGGGGGGGGGG\n+@HWI-2957\n+GATTATACCGTCGAATGTTGTAAGA\n++\n+BBCCCGGGGGGGGGGGGGGGGGGGG\n+@HWI-2958\n+TTGAACCGCGACAATATAAACATT\n++\n+CCBCCGGGGGGGGGGGGGGGGGGG\n+@HWI-2959\n+TATTGCACTTGAGACGGCCTGA\n++\n+BA:AA;FCFGGCGGGGGGGGGG\n+@HWI-2960\n+TATTGCACATTCACCGGCCTGA\n++\n+BBCC@GGGGFGGGGGGGGGGGG\n+@HWI-2961\n+TATTATCATCTCGTTCGTCCGTCGCCA\n++\n+?BB@BGGGGGGGGGGGGGGGGGGGGGG\n+@HWI-2962\n+ATCGGATTCTTTTACCAACTTTGT\n++\n+BBCBCGGGGGGGGGGGGGGGGGGG\n+@HWI-2963\n+TAATACTGTCAGGTAAAGATGTC\n++\n+CCCCCGGGGGGGGGGGGGGGGGG\n+@HWI-2964\n+TACTAGCCCTCTGCTGACAAAGG\n++\n+BCBBBGGGGGGGGGGGGGGGGGG\n+@HWI-2965\n+TCTCCGCAAAGCCTGAGGATAGGAAA\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGG\n+@HWI-2966\n+CTCTAGATTGCTGCGCTTGCC\n++\n+BCCCCGGGGGGGGGGGGGGGG\n+@HWI-2967\n+GGCTCCAAAATCGAAAAGTTGCGACT\n++\n+==AB0;FGGDD>FGFGDCGG>BEGG<\n+@HWI-2968\n+TGGACGGAGAACTGATAAGGGC\n++\n+@B@?AEEGG/F>FDGGGFGGGG\n+@HWI-2969\n+TTTTGATGGTAGTATCTAGGACT\n++\n+BBBCCBECD11>=CFGG11C1;C\n+@HWI-2970\n+AGAAGGTTTACGGATTTTTTT\n++\n+BBBBBGGGGDGGGGGGGGGGG\n+@HWI-2971\n+TCACTGGGCTTTGTTTATCTC\n++\n+CBCBBFBGG@F@GGGBG>GGG\n+@HWI-2972\n+TAGTTTCGTTGGACTGGAGAGC\n++\n+BBBBBFGGGGGGEGGGGGGGGG\n+@HWI-2973\n+TCACTGGGCTTTGTTTATCTCA\n++\n+CCCBBEFGG>GGGGGBEGGGGG\n+@HWI-2974\n+TATTAGTCATCGGTAGCCTGGACCT\n++\n+CCCCCFGGGGGGGGGGGGGGGGGGG\n+@HWI-2975\n+GAGTAGAACAGCTGAACTTCCG\n++\n+BCCBBGGFGGGGGGGGGGGGGG\n+@HWI-2976\n+TCACTGGGCTTTGTTTATCTCA\n++\n+CCCCCFGGGGGGGGGGGGGGGG\n+@HWI-2977\n+TATTGCACATTCACCGGCCTGAA\n++\n+CCCCCGGGGGGGGGGGGGGGGGG\n+@HWI-2978\n+TGACTAGATCCACACTCATTAA\n++\n+BBCCCGGGGGGGGGGEGGGGGG\n+@HWI-2979\n+TGCGCAGCTGGGATACGTTATGGAA\n++\n+B@BBBGDGGGBFBGGGGGGGGGGGG\n+@HWI-2980\n+TTGATTAAGATAGCTGCTCCGCCT\n++\n+AABA=EGGGGGGGGGGGGGGGGGG\n+@HWI-2981\n+TGAGATCATTTTGAAAGCTGATT\n++\n+CCCCCGGGGGGGGGGGGGGGGGG\n+@HWI-2982\n+TATCCAAAGAGCTGATTGTCATACTT\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGG\n+@HWI-2983\n+GAAAGAATAAACGCCGTTGGAAAA\n++\n+CBCCCGGGGGGGGGGGGGGGGGGG\n+@HWI-2984\n+TTATAGTCACCCGCTGCGATGAACC\n++\n+CCCCCGG1=>@;E/EGGGAGGCFDG\n+@HWI-2985\n+TGAACTGCAGGACACATGAAC\n++\n+CCCCCGGGGGGGGGGGGGGGG\n+@HWI-2986\n+AAGAGAGCTATCCGTCGACAGTC\n++\n+BBBCCGGGGGGGGGGGGGGGGGG\n+@HWI-2987\n+GGACAAATGACGATCTAGACAGTT\n++\n+CCBCCGGGGGGGGGGGGGGGGGGG\n+@HWI-2988\n+CAAGGTCTTCACCACTCCGCGGTCC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGG\n+@HWI-2989\n+TCACTGGGCTTTGTTTATCTC\n++\n+BBBBBGCFBGGG1FF>1CGFE\n+@HWI-2990\n+TAGGAACTTCATACCGTGCTCT\n++\n+CCCCCGGGGGGGGGGGGGGGGG\n+@HWI-2991\n+GTGCATTGTAGTCGCATTGTCT\n++\n+BCCCCGGGGGGGGGGGGGGGGG\n+@HWI-2992\n+ATTCACGCCGTACTTTGCAGACTCGAGC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGGGGD\n+@HWI-2993\n+TCACTGGGCTTTGTTTATCTCA\n++\n+CCCCCGGGGGGGGGGGGGGGGG\n+@HWI-2994\n+TCCTCAGACATAGGAGAGAGCGGCC\n++\n+CCCCCGGGGGGGGGGGGGGGGGGGG\n+@HWI-2995\n+TACACGGACAGGATGGTTTTCCT\n++\n+BC@BBGGGGGGGGGGGGGGGGGG\n+@HWI-2996\n+AATTGCACTAGTCCCGGCCTGC\n++\n+:A=AA>@GGG>GFGGG@EGGGG\n+@HWI-2997\n+TCGCAACACAACACTGATCACACG\n++\n+CCCCCGGGGGGGGGGGGGGGGGGG\n+@HWI-2998\n+GATGACTTCAAATCCATGTTTGCTGC\n++\n+BBBBBGEGGGGGGGGGGGGGGGGGGG\n+@HWI-2999\n+CTACATATGGTTGAGGGTTGTA\n++\n+CCCCCGGGGGGGGGGGGGGGGG\n+@HWI-3000\n+TTTGAACCCTCTGTCGTAGAACAGC\n++\n+<3<001?;>FDBCFG@C@C/:BD11\n'
b
diff -r 000000000000 -r e7e7785e41d0 test-data/sample1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.fa Mon Sep 11 18:27:40 2017 -0400
b
b'@@ -0,0 +1,50000 @@\n+>25001\n+TGGGTGTCCATCTTGTGACAGTACT\n+>25002\n+CGTGATGTGACGTAGAGGAA\n+>25003\n+TAAGACTATAATTGATGGAATGAACT\n+>25004\n+GATATGCTTCTCATGTAGGAATTGAGC\n+>25005\n+TCACTGGGCTTTGTTTATCTCA\n+>25006\n+TCTTGTAGGTGAGTCTCGGTGATCGA\n+>25007\n+TGAACACAGCTGGTGGTATCC\n+>25008\n+CGTCAATATGTTGCAGGAGTAAAGA\n+>25009\n+TTAATTGACTTTTAAATTGTTCTGGA\n+>25010\n+CCTGATATGTTGACGTAGTTGCTCGA\n+>25011\n+ATGTCATCGATGCGCAGATTTTTGG\n+>25012\n+ATTATTAAATCTTTGGATACCA\n+>25013\n+TCACTGGGCTTTGTTTATCTCA\n+>25014\n+GAACAGAATTCTTTGAAACGGGGTA\n+>25015\n+TATCACAGCCATTTTGACGAGTT\n+>25016\n+AACGGATAGAGCGAATTCTGAGTGGT\n+>25017\n+TGTGATGTGACGTAGTGGAACA\n+>25018\n+TGAAGATATTTATTAAATTAAAGTGG\n+>25019\n+ACTACATATGGTTGAGGGTTGTA\n+>25020\n+TAATTATCTTTGACCATTGTTTTGT\n+>25021\n+TCACTGGGCTTTGTTTATCTCA\n+>25022\n+TCACTGGGCTTTGTTTATCT\n+>25023\n+AGCGGACAACCTAGCCAGG\n+>25024\n+ATCGGTTGTTCAGTGGTAGAATGCT\n+>25025\n+GACAGCGAACTCAGGATTTGTGGA\n+>25026\n+ACCCGGGCGGGAACACCA\n+>25027\n+TGTGATGTGACGTAGTGGAAC\n+>25028\n+TCTTTGGTGATTTTAGCTGTAT\n+>25029\n+TACTGAAGGAATCTTCTTACATTTCCC\n+>25030\n+TTACGACAAGTCGAACAAAGCTGC\n+>25031\n+TGACTACTCCAGACACCTTGATATG\n+>25032\n+CTATCAGCCATCCTGAGC\n+>25033\n+TATCACAGCCATTTTGACGAGA\n+>25034\n+CGGTACTATTACTTTGAACA\n+>25035\n+ATTAATAGTATAGATACCA\n+>25036\n+ATATGAGGACTGGATGCACATTTTG\n+>25037\n+TAAGGTAAAGTAAGATTTCTCATTGGTG\n+>25038\n+AAACAAATTCTTTAATAAAACGA\n+>25039\n+TCACTGGGCTTTGTTTATCTCA\n+>25040\n+TAATGGACTTCGAAGTTGAAGCGGGC\n+>25041\n+TGTGATGTGACGTAGTGGAAC\n+>25042\n+TGGAAGACTAGTGATTTTGTTG\n+>25043\n+TTAGCTTTTCATTGTCGTGTAAGAGTT\n+>25044\n+GCTATTGATGCGAAGTCTTTGA\n+>25045\n+TTTAAGAAGACGGACTCGGTT\n+>25046\n+CGATTGATTTGCTCAGGATTGCTGA\n+>25047\n+CGGCGACTTCAAGTTCTGTGACG\n+>25048\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25049\n+GCCTGAGAGCTCCGGAAGGACGTACGC\n+>25050\n+GGTTCTATTGTGTAATGG\n+>25051\n+ATCGGTGGTTCAGTGGTAGAATGCT\n+>25052\n+AAATCATCCGAACACATGGTCGAA\n+>25053\n+CACCGGAGCGTTGACCTGAGTTGGC\n+>25054\n+TTATTGTGCCACTGAGTCGACTATTAA\n+>25055\n+TTGTGTATTATGATTCTGATTCGTG\n+>25056\n+TCCGGCCAAATATGAACGACGTAAGG\n+>25057\n+TTGAGCTGTACTACGCTAATATGAGTG\n+>25058\n+TAATTGCATCTGACTTAGCACCACAAGA\n+>25059\n+GGGGATGTAGCTCAGTGGTAGAG\n+>25060\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25061\n+TCACTGGGCTTTGTTTATCTC\n+>25062\n+TAAGATTGAAAATGACTGTGGAGT\n+>25063\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25064\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25065\n+AATTGTGGAACTTTCTTGCTAAA\n+>25066\n+TGTAAGCTCTGAAATAACATATT\n+>25067\n+TTGAAAAGCTTGCGCAGTTTCATGGGA\n+>25068\n+TGGTTTCCTTATTAAGTTTACGGTA\n+>25069\n+TCTTTGGTATTCTAGCTGTAGA\n+>25070\n+TACATACTGGCCAGCAGACCCAAGAA\n+>25071\n+TGATGCACAAACCTGGCAAGCCGGA\n+>25072\n+TGTGATGTGACGTAGTGGAACA\n+>25073\n+TATATTTCGGGTGTTTTGTGATTGTG\n+>25074\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25075\n+TATCACAGCCATTTTGACGAGTT\n+>25076\n+TGCCGACCTGCCTGGGATTTGGGGTT\n+>25077\n+TCCTCGGATTTCGTTCTCTCAATT\n+>25078\n+TGTGATGTGACGTAGTGGAAC\n+>25079\n+GCATCGGTGGTTCAGTGG\n+>25080\n+TATTGCACATTCACCGGCCTGAAA\n+>25081\n+TGTGGCAATACGTTTGTTTAGGCGGC\n+>25082\n+TCTTTGGTGATTTTAGCTGTAT\n+>25083\n+TATCCACCCGAAATCAGTTTTTTGA\n+>25084\n+TCCGCAAATCCGTGAATCTGAATGACT\n+>25085\n+TAACGTACTCAGAACTGAACTAAT\n+>25086\n+TAACGGAGAGGCCTTACATAACGGGG\n+>25087\n+TTGGGGGTTGGACATAGTTAATGTT\n+>25088\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25089\n+TGCTTGGACTACATATGGTTGAGGGTTGT\n+>25090\n+GGATCTAGTATACAAAATCGGAGGAGC\n+>25091\n+TTTTGTCTGAGGTTATTAAATATA\n+>25092\n+TCAAATGCAAATTGGATTGAGGAGA\n+>25093\n+TTTTTTTTGTTTTGAGGTTGATCAGA\n+>25094\n+TAAGTTGAGACCAAAAGTTAGATT\n+>25095\n+GTTCAATTCCCCGTCGGGGA\n+>25096\n+TTTTTCTGTTTATTTTCAATGAAACG\n+>25097\n+TTGGCGCTTCTCCTGGCGTGCCCGGTA\n+>25098\n+TAGTAGAAACAAACAGGCGACTCCATC\n+>25099\n+TGGACGGAGAACTGATAAGGGC\n+>25100\n+TGTGATGTGACGTAGTGGAACA\n+>25101\n+TCACTGGGCTTTGTTTATCTCA\n+>25102\n+TGAGTTTGTAGAGGAGTCCAGGAT\n+>25103\n+GATGGTATTTGATGATGAACTTGAA\n+>25104\n+TTGACTCCAACAAGTTCGCTC\n+>25105\n+GAGCGAATTCTGAGTGGT\n+>25106\n+CCTGCGTGATCTTGGCAACTCTGTTG\n+>25107\n+GGGAAAAACAATATCGTTTAGTGATT\n+>25108\n+TCTATTCCACTGGAACTTTGCACGTT\n+>25109\n+TAGATGTAATAGATTTGGTTTCCGA\n+>25110\n+TCTGATCTAAGAAATTGAGACGTGGC\n+>25111\n+GCAGTCGTGGCCGAGCGGTTAAGGC\n+>25112\n+GCATCGGTGGTTCAGTGGTAGAATG\n+>25113\n+TCGATGCGGAGATTTTTGGACGGG\n+>25114\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>25115\n+ATCTCGGTAGAACCTCCA\n+>25116\n+AAATATCAGCTGGTAATTCTGGG\n+>25117\n+TAA'..b'CCACATGATTCGGCTT\n+>49886\n+ATACATTGTAGACGGTCTTACGGGA\n+>49887\n+TAAACTAATAGAGACAGGTAGAATC\n+>49888\n+TTGTTGCAATGTCTGACTGGGGTTCGT\n+>49889\n+TGCTTGGACTACACATGGTTGAGGGTTGTA\n+>49890\n+GGGTCAGGCGATGATGAATT\n+>49891\n+TGTGATGTGACGTAGTGGAACA\n+>49892\n+TTGGGATATTGTTGGAAATGATTTTT\n+>49893\n+TATTCGAGAATTTTGTGATTAGTGA\n+>49894\n+TTGTGGAATGTTCGTGTCGAA\n+>49895\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49896\n+TGGAAGACTAGTGATTTTGTTG\n+>49897\n+TGCGACGGCGGCGAACGTAGTA\n+>49898\n+TGTGATGTGACGTAGTGGAA\n+>49899\n+TGATGACGAGCGTTCTTTTAGA\n+>49900\n+TCAGGGTGACCACACATTTCAAGGA\n+>49901\n+TGGAATTTCGTTGTGTCGTCAGTGTGA\n+>49902\n+AAGGACCCGAGGGCTGCAACCTTTTC\n+>49903\n+TTAGATAACTGAAAGCAAGTACTGG\n+>49904\n+AAGAAGCCGTCGAGAGATATCGGA\n+>49905\n+TGTTATCGATCATTTTAGTTCGCTGA\n+>49906\n+TCCCATATTGTCTAGTGG\n+>49907\n+TACATGGTTGTCTTGTAGAGTTGACGC\n+>49908\n+TGAGCGGAGAACCAGAGTTGATGTG\n+>49909\n+TATAGGTCTGATTCTAAAATGGGTGA\n+>49910\n+GGACTGACTCGTGTAGTGTGCACT\n+>49911\n+TGTTTTTCGGCCTTATAAACGGGG\n+>49912\n+CCTAATAAGAATTGAGGGATCAGGA\n+>49913\n+TATTTGTGCTGCCTCCTCTGAAATCA\n+>49914\n+TGTTATGTTGCCAGTCTGAGTCGTCAG\n+>49915\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49916\n+CAATATCGTCAACATCCTCGAACG\n+>49917\n+TGTGATGTGACGTAGTGGAAC\n+>49918\n+TTTAAGCTGGTAGGTGTAGAAGCCT\n+>49919\n+TTCTTTCGGATTCTGAAGTAATTAAT\n+>49920\n+GAAGATGAAACTGTTCTGGACGGA\n+>49921\n+TCACTGGGCTTTGTTTATCTCA\n+>49922\n+TCAGGTACTTAGTGACTCTCAA\n+>49923\n+ATCGAAAAGATTCGCTGAAGTTGGGC\n+>49924\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49925\n+TAGATGTAAGAATAACTGTTTTGA\n+>49926\n+TAGCAACCAGGTCATCTTCAAACT\n+>49927\n+TTTATTGGAGAGGTTGATCCT\n+>49928\n+GAAGGGTTCGGGCTCAATTAGAGGGT\n+>49929\n+CATGGGTTCTGATGTGTTTTCACGA\n+>49930\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49931\n+TCCATTTTTGAACACAGTTTGATGGGA\n+>49932\n+TAACTGAAGTATCTGAGGATTGGATT\n+>49933\n+TTTGAACTGGTCTGTGTTGAATTCAACC\n+>49934\n+GACGTTAGAAATCCGTTGGTGGA\n+>49935\n+TGCTTGGACTACATATGGTTGAGGG\n+>49936\n+TGGACTCGTTAGGTATGGATGTTGC\n+>49937\n+TGAGTCCCACAATACTGTATATA\n+>49938\n+TAAGACTATAATTGATGGAATGAACT\n+>49939\n+TGTTATACTCAGATACAGACGGTTCGA\n+>49940\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49941\n+TCTGAGTTTATTTTTAAAAGGTGTT\n+>49942\n+TCAGGTACTTAGTGACTCTCA\n+>49943\n+TCAGGATTGCTGAGCTGTGCGGTA\n+>49944\n+GGGGATGTAGCTCAGTGGA\n+>49945\n+AATATCATCTATTCTCGGTAGTGGA\n+>49946\n+GGTTCCATGGTGTAATGG\n+>49947\n+TGCGAGGTCAATGGTTGTAAAGTA\n+>49948\n+TCTTTGTAGTCGGTTGGCTGTACAGGT\n+>49949\n+GGCACTTGAACTTGGCACTGGACGC\n+>49950\n+GGGGACGTAGCTCAGTGGGG\n+>49951\n+TGCATAAGATATTCCGCCTCGAGAAGA\n+>49952\n+TGGACGGAGAACTGATAAGGG\n+>49953\n+TATTAACCAGACTGCAAATATACT\n+>49954\n+AACAATTAGGACCACGGCGATGGT\n+>49955\n+TTGGTGTAATCTTGGATCGGAGAC\n+>49956\n+CTAACTGTGAAAGGGGAATTGACCGGC\n+>49957\n+TGTAAAAGGGTGTCTCACTGCGGC\n+>49958\n+TTCTTATGGATTCTGAGTGATGTG\n+>49959\n+TTACAAAGTTGTAGATTGGTCGGGG\n+>49960\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49961\n+TAAATTGATTTAGTTTGAATTTAGA\n+>49962\n+TCTTTGGTATTCTAGCTGTAG\n+>49963\n+TAATACTGTCAGGTAAAGATGTCA\n+>49964\n+TATGCCGTAAGCTTGTTGCGCGCGGA\n+>49965\n+ACTTTCGGACTGATTGTTATGGTGG\n+>49966\n+TAATTGAGTACAGTTGGACATAGC\n+>49967\n+GCGGAGGTTGCGGTGCTGTACACTG\n+>49968\n+TTTCAGTTCGGGTAGGGTGGG\n+>49969\n+TGAGTACAAGCCCTTCTGGCGTGAAGA\n+>49970\n+TTTTGGAGCCACTTTCGCCTCGTAGGA\n+>49971\n+TCTATCCAAAGAGCTGATTGTCATACT\n+>49972\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49973\n+TTTGTGCAAATCAATATAAATTGA\n+>49974\n+TCGAATTGCTGAATGCCGAAGTAAAATA\n+>49975\n+AGATATGTTTGATATTCTTGGTTG\n+>49976\n+TGCTTGGACTACATATGGTTGAGGGTTGTA\n+>49977\n+TCCCCCCCTAAGATTATGACCGTC\n+>49978\n+TTGCAGACGGACTTTTGAATTCACAGG\n+>49979\n+TGCTTGGACTACATATGGTTGAGGG\n+>49980\n+TGAGGTAGTAGGTTGTATAG\n+>49981\n+TGACGATGCCGCTGTAGAGCTTG\n+>49982\n+TGTTGTGTTGCGACAGTAGAGCGC\n+>49983\n+TACATGGCGACTTTCTTGCAACTGAACT\n+>49984\n+TGCAGGAAGCTTTCAAGGCAACAGA\n+>49985\n+TTAGGACGAGATTCGCTAATGCAAT\n+>49986\n+TAACATATGTGCAAGTTATTGGGA\n+>49987\n+TGTGATGTGACGTAGTGGAA\n+>49988\n+AACTTCTTCTATCTTTGTGCGGGA\n+>49989\n+TATTTGGGTCACCGGGTTAAGTAGCGC\n+>49990\n+ATATTGTCTAGTGGTTAGG\n+>49991\n+TGAGGTAGTAGGTTGTATAGT\n+>49992\n+TCTTGGACTGAGCAGCTACTGTTTG\n+>49993\n+GCTCTCTTGAGTGGATTGCGCATGGA\n+>49994\n+TGATCTGGGGTGCATGGTAATCGG\n+>49995\n+AATGGCACTGGAAGAATTCAC\n+>49996\n+CGGGAAACTATGGATCAAATG\n+>49997\n+ATCTGCCTGAGTCGACTGTTCCGTAA\n+>49998\n+TTTGAGCAGCGAATCTGGAACGGT\n+>49999\n+TCGAAGACTAGACGGATTTTTCCCGGCT\n+>50000\n+TATTTAGAAAAACAGGTGAGTGA\n'
b
diff -r 000000000000 -r e7e7785e41d0 test-data/sample1_output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1_output.tab Mon Sep 11 18:27:40 2017 -0400
b
@@ -0,0 +1,8 @@
+2L-tail.fa Matched 2954
+
+dme_miR21_hairpin.fa Matched 0
+
+Ensembl_transposon_set.fa Matched 2689
+
+Unmatched to previous indexes 265
+
b
diff -r 000000000000 -r e7e7785e41d0 test-data/sample_output.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_output.tab Mon Sep 11 18:27:40 2017 -0400
b
@@ -0,0 +1,8 @@
+2L-tail.fa Matched 638
+
+dme_miR21_hairpin.fa Matched 0
+
+Ensembl_transposon_set.fa Matched 560
+
+Unmatched to previous indexes 78
+
b
diff -r 000000000000 -r e7e7785e41d0 tool-data/bowtie_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample Mon Sep 11 18:27:40 2017 -0400
b
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon  hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full  hg18 hg18 Full  /depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19 hg19 hg19  /depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
b
diff -r 000000000000 -r e7e7785e41d0 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Sep 11 18:27:40 2017 -0400
b
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="bowtie_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices.loc" />
+    </table>
+</tables>