Repository 'mirdeep2'
hg clone https://toolshed.g2.bx.psu.edu/repos/rnateam/mirdeep2

Changeset 0:eaac585f172a (2015-01-27)
Next changeset 1:b21be04f52e4 (2015-02-04)
Commit message:
Imported from capsule None
added:
mirdeep2.xml
test-data/cel_cluster.fa
test-data/mature_ref_other_species.fa
test-data/mature_ref_this_species.fa
test-data/output.mrd
test-data/precursors_ref_this_species.fa
test-data/reads_collapsed.fa
test-data/reads_collapsed_vs_genome.arf
test-data/result.bed
test-data/result.csv
test-data/survey.csv
tool_dependencies.xml
b
diff -r 000000000000 -r eaac585f172a mirdeep2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mirdeep2.xml Tue Jan 27 09:06:24 2015 -0500
[
b'@@ -0,0 +1,196 @@\n+<tool id="rbc_mirdeep2" name="MiRDeep2" version="2.0.0">\n+    <description>\n+<![CDATA[\n+identification of novel and known miRNAs\n+]]>\n+    </description>\n+    <requirements>\n+        <requirement type="package" version="2.0">mirdeep2</requirement>\n+        <requirement type="package" version="2.0">mirdeep2_quantifier</requirement>\n+        <requirement type="package" version="0.12.7">bowtie</requirement>\n+        <requirement type="package" version="5.18.1">perl</requirement>\n+        <requirement type="package" version="1.8.5">vienna_rna</requirement>\n+        <requirement type="package" version="2.023">pdf_api2</requirement>\n+        <requirement type="package" version="2.0">randfold</requirement>\n+    </requirements>\n+\n+    <command>\n+<![CDATA[\n+    miRDeep2.pl\n+    \n+    $reads\n+    $genome\n+    $mappings\n+    \n+    #if $mature_this\n+        $mature_this\n+    #else\n+        none\n+    #end if\n+    \n+    #if $mature_other\n+        $mature_other\n+    #else\n+        none\n+    #end if\n+    \n+    #if $precursors\n+        $precursors\n+    #else\n+        none\n+    #end if\n+    \n+    #if $species.value != \'all\'\n+        -t $species\n+    #end if\n+    \n+    #if $star_sequences\n+        -s $star_sequences\n+    #end if\n+    \n+    #if $min_read_stack\n+        -a $min_read_stack\n+    #end if\n+    \n+    #if $min_read_stack\n+        -a $min_read_stack\n+    #end if\n+    \n+    -g $max_precursors_analyze\n+    -b $min_score_cutoff\n+    $disable_randfold\n+    \n+    ; cp result*.bed result.bed 2> /dev/null\n+    ; cp result*.csv result.csv 2> /dev/null\n+    ; cp mirdeep_runs/run*/output.mrd . 2> /dev/null\n+    ; cp mirdeep_runs/run*/survey.csv . 2> /dev/null\n+    \n+    ## html output\n+    ;\n+    cp result*.html $html 2> /dev/null\n+    \n+    ## move pdf directory to be accessible from the new index.html\n+    ;\n+    mkdir -p $html.files_path 2> /dev/null\n+    ;\n+    cp -R pdfs* $html.files_path 2> /dev/null\n+    \n+]]>\n+    </command>\n+    <stdio>\n+        <!-- Anything other than zero is an error -->\n+        <exit_code range="1:" />\n+        <exit_code range=":-1" />\n+        <!-- In case the return code has not been set propery check stderr too -->\n+        <regex match="Error:" />\n+        <regex match="Exception:" />\n+    </stdio>\n+    <inputs> \n+        <param name="reads" format="fasta" type="data" label="Collapsed deep sequencing reads">\n+            <help>\n+<![CDATA[\n+Reads in fasta format. The identifier should contain a prefix, a running\n+number and a \'_x\' to indicate the number of reads that have this sequence.\n+There should be no redundancy in the sequences.\n+]]>\n+            </help>\n+        </param>\n+        <param name="genome" format="fasta" type="data" label="Genome" help="Genome contigs in fasta format. The identifiers should be unique."/>\n+        <param name="mappings" format="tabular" type="data" label="Mappings" help="Reads mapped against genome. Mappings should be in ARF format."/>\n+        <param name="mature_this" optional="true" format="fasta" type="data" label="Mature miRNA sequences for this species" \n+            help="miRBase miRNA sequences in fasta format. These should be the known mature sequences for the species being analyzed."/>\n+        <param name="mature_other" optional="true" format="fasta" type="data" label="Mature miRNA sequences for related species">\n+            <help>\n+<![CDATA[\n+miRBase miRNA sequences in fasta format. These should be the pooled known\n+mature sequences for 1-5 species closely related to the species being analyzed.\n+]]>\n+            </help>\n+        </param>\n+        <param name="precursors" optional="true" format="fasta" type="data" label="Precursor sequences" \n+            help="miRBase miRNA precursor sequences in fasta format. These should be the known precursor sequences for the species being analyzed."/>\n+            \n+        <param name="species" type="select" label="Search in species" help="If not searching in a specific species all species in your files will'..b'        <option value="cfa">dog</option>\n+            <option value="fru">fugu</option>\n+            <option value="bta">cow</option>\n+            <option value="der">d.erecta</option>\n+            <option value="dgr">d.grimshawi</option>\n+            <option value="gga">chicken</option>\n+            <option value="spu">s.purpuratus</option>\n+            <option value="bfl">lancelet</option>\n+            <option value="ptr">chimp</option>\n+            <option value="dse">d.sechellia</option>\n+            <option value="dpe">d.persimilis</option>\n+            <option value="dvi">d.virilis</option>\n+            <option value="rno">rat</option>\n+            <option value="dme">d.melanogaster</option>\n+            <option value="lca">cat</option>\n+            <option value="sja">c.japonica</option>\n+            <option value="dan">d.ananassae</option>\n+            <option value="hsa">human</option>\n+            <option value="dsi">d.simulans</option>\n+        </param>\n+        <param name="star_sequences" format="fasta" type="data" optional="true" label="Star sequences" help="From miRBase in fasta format (optional) (-s)"/>\n+        \n+        <param name="min_read_stack" optional="true" type="integer" minvalue="0" label="Minimum read stack height">\n+            <help>\n+<![CDATA[\n+minimum read stack height that triggers analysis. Using this option disables\n+automatic estimation of the optimal value and all detected precursors are analyzed. (-a)\n+]]>\n+            </help>\n+        </param>\n+        <param name="max_precursors_analyze" type="integer" value="50000" label="Maximum precursors" \n+            help="Maximum number of precursors to analyze when automatic excision gearing is used. If set to -1 all precursors will be analyzed. (-g)."/>\n+        <param name="min_score_cutoff" type="integer" value="0" label="Minimum miRNA score" \n+            help="Minimum score cut-off for predicted novel miRNAs to be displayed in the overview table. (-b)"/>\n+        <param name="disable_randfold" type="boolean" truevalue="-c" falsevalue="" label="Disable randfold analysis" help="(-c)"/>\n+    </inputs>\n+    <outputs>\n+        <data name="tab_results" format="tabular" from_work_dir="result.csv" label="Tabular output of ${tool.name} on ${on_string}"/>\n+        <data format="html" name="html" label="${tool.name} on ${on_string} (html report)"/>\n+        <data name="pred_acc" format="tabular" from_work_dir="survey.csv" label="Prediction accuracy output of ${tool.name} on ${on_string}"/>\n+        <data name="bed_out" format="bed" from_work_dir="result.bed" label="Bed output of ${tool.name} on ${on_string}"/>\n+        <data name="mrd_out" format="txt" from_work_dir="output.mrd" label="Text output of ${tool.name} on ${on_string}"/>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="reads" value="reads_collapsed.fa"/>\n+            <param name="genome" value="cel_cluster.fa"/>\n+            <param name="mappings" value="reads_collapsed_vs_genome.arf"/>\n+            <param name="mature_this" value="mature_ref_this_species.fa"/>\n+            <param name="mature_other" value="mature_ref_other_species.fa"/>\n+            <param name="precursors" value="precursors_ref_this_species.fa"/>\n+            \n+            <output name="tab_results" file="result.csv" compare="sim_size"/>\n+            <output name="prec_acc" file="survey.csv" compare="sim_size"/>\n+            <output name="bed_out" file="result.bed" compare="sim_size"/>\n+            <output name="mrd_out" file="output.mrd" compare="sim_size"/>\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+**What MiRDeep2 does**\n+\n+MiRDeep2 is a software package for identification of novel and known miRNAs in deep sequencing data. Furthermore, it can be used for miRNA expression profiling across samples.\n+]]>\n+    </help>\n+    <citations>\n+        <citation type="doi">10.1093/nar/gkr688</citation>\n+        <citation type="doi">10.1002/0471250953.bi1210s36</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r eaac585f172a test-data/cel_cluster.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cel_cluster.fa Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,132 @@
+>chrII:11534525-11540624
+tccaaaatattagaggccccacgaaaaggggagcagaacgaaaaggggat
+ctgcaaaaaggggatctgcgaaaaggggagatacgaaaaggggagatacg
+aaaaggggagcaacgaaaaggggatctggcactgtgccaaacgctatttt
+tctcaaagaaaacaatacaacgatgctccgatgttacgcgtcgcgtgttg
+ttaagcgtatcttctagaagaaaatttcaaaaatcaacgtgcgttgcgtg
+agaaaaattgcgttttgtgcgtttggcacagtgccagctccccttttcgt
+tgctccccttttcgtatctccccttttcgtatctccccttttcgcagatc
+ccctttttgcagatccccttttcgttctgctcccctcgtggggcctcaaa
+taatattgtcaatggagcgcagttgcgcacaccgaaatacgcacgaaatt
+tgaaattggcaggcctagaaagtctggatttaaaatatttatgcatatta
+acctgagtttcggcatgtttagataaatttagattaaaatttattaaata
+aaaatatttaaaatttattaaattcaacattatctttaattttacagaaa
+taaaaaccaaaaaaccaacgaagtgtaacttttcgacaataaatatttat
+aaattattcaattaattggaaatctatttatcacatctcatcgtgaacaa
+gcacatggctaaacaatgcctcgactttatccacccatcctggatttttc
+ctgttgaaaatcttcaaatccttcatagtctgctcaattgccttaacagc
+cttttcccttttcacatccttcgtcaattcctcaaccatcgcctctacaa
+gttgtgatcgtgaggccccttcaacgaatttcttgtccaatacatctctg
+agctgaaaaataagtccctttattcatgtgcaccacctgccactcattca
+ttttagatcctaatcctgtatttctctctctctctcactaatcgtcccgt
+catatctgacccactataatgaggggaccaccactttgccccagatttcg
+taatgagccaacatacactaggccctgttctagcacccccattgtgcatg
+ggagacacatcgtctgttggttcgggtcagctgagatgacgtggcagatg
+acgtcatgtttgttctctagcagatccctatcgtaaaataccgttctttg
+ttcgtttatgagcacttcttgggaaacaagaagaatattagagaatattg
+gattttaggaaatttcttgaccctaataatatgtatctcatggggctaat
+tacgaaagatttagagctcactaaagcagatgtggataagctggaactat
+agtcaattctcacgagatattattattagcttactcgttgaatttgatgc
+cgtgttaggtagaagccgagaacacgatgaaccttggaagttatgtagtc
+gagttttctggaagaaaaagtatatatagtttgataaaaagttacgtcat
+aagttattggtcccgagtgtgtacactagaaacttgcgcaatcagttggt
+caggtggtctaggacctttacatgtaatataattcattggtttgttgctc
+tagatctcaaagaatatggttttcctgctgagaaacaaaatttttttttc
+aaatgttcgcgatattatacgatttttttgcgctgaaaaatacggtatcc
+ggactcgacacgacagtttttcaataaattcggaaaggtacatatgcgcc
+tttgaagagtactgtaattttaaacatttgttgatgcagtaaatattctc
+acggatttttagaggtttttttttgaatatttatacagttatattagagt
+taaaaaaataaatctatttttatcgaaatttatgaaaaatctactcccca
+gaaacaaagtttgaatttacttaaaatgtatgtgcgcctttttaaattca
+acaaaaatttatcaaattttgtctttttaatcacttttacttttttttcg
+aaaaaaaaaaaaactttaaaaacaccgaaaattaaaatttttaattaccg
+taatcctttaaaggcgcacacctctttttcgcaatcataaaaatttacag
+gcacagaatttcctactgtaacgtgttttctcgatagaaacgtttctcca
+aatcgaaatttttattagcaatcatcaattcgtcatgtttctgtgctgga
+gcacattgttatgcagtgctcatgaatattcacagttgaaaatcgaattt
+ttttttcttatttttattaacttctctcctcactcaagtgcaatctcatt
+tcggcggaacgatgaaagcggactcgtttgaaacggattacactgctggc
+tggctccatcttttttatttctggtttttttcccgacacgggaggtcatc
+cgtccacgtcttcaatcttattcgcttctccttctatctatctctcttcc
+aaaaaaatggaacagattgtccgccttctactttccgcatgcgcctattc
+tagacatcgagaagaagcgagagaatccgaaaaactccgccgtccggcgc
+ctagcgttcatcccgtcgcatccaatttccttcacccctgtctccagtct
+ctaatctgtgtttcccagaccttctccttcttcttctcccctctttcttc
+cctttcttttctacgattcctagacctgcccggttgaccatcgggcccac
+catttatcgagcgcgtccgtctgttcgttgctgtgagcgcgcgcgcgcga
+gcgcacactgcacaccatttgaccgcaacgagccgagagagtgtgtgaaa
+tgaagagacgccgagtggacacgttgctctacaccaaacgcgctacacca
+aatgggcggagacccaatggtgcaccaagtcagcagtataaaaggaatgg
+aaatggtccattcagtcatcagttgtttttttattttctcttttctccca
+tcactttctcccttcttcccctctcctaatttccattcccaactattatt
+ctcggatcagatcgagccattgctggtttcttccacagtggtactttcca
+ttagaactatcaccgggtggaaactagcagtggctcgatcttttccactt
+gctccaccgctgtcggggaaccgcgccaattttcgcttcagtgctagacc
+atccaaagtgtctatcaccgggtgaaaattcgcatgggtccccgacgcgg
+aaagataaaatatcttaaaatcgattctagaaacccttggaccagtgtgg
+gtgtccgttgcggtgctacattctctaatctgtatcaccgggtgaacact
+tgcagtggtcctcgtggtttctctgtgagccaggtcctgttccggttttt
+tccgtggtgataacgcatccaaaagtctctatcaccgggagaaaaactgg
+agtaggacctgtgactcattccgatttctggagttttcccctacattcta
+ccattcagctactgattattgaattcctgatataccgagagcccagctga
+tttcgtcttggtaataagctcgtcattgagattatcaccgggtgtaaatc
+agcttggctctggtgtctccgaacctcctgtccgcacctcagtggatgta
+tgccatgatgataagatatcagaaatcctatcaccgggtgtacatcagct
+aaggtgcgggtacaggtgcatttgatatcaaggtgagttatttttaagtg
+ggtcccagagaccttggtggtttttctctgcagtgatagatacttctaac
+aactcgctatcaccgggtgaaaaatcacctaggtctggagcctcctgctc
+tttttttgctgtttttaaacttaaacttaaaactaacactttatcaaaat
+aaagtcttccaaaatcttttttctgatctgccggaagactttccatgaat
+ttgtccaaaactcgttgcactttatcctcgtcgtaatcatttgacaccat
+atctttggataactagaataattgaacattggtttaaaaaaattctctca
+aaaaaaaaatcacaaacctgatgaattttagcctcatctatagcagtatc
+tgcgagaagaatcgtaagattcattgataattctatatattttccaataa
+ttttatccgaagctggatgtgaagtttctacagtacctccgtggcataat
+actacaatggaagcaaaaagagcgacaattaggatcgtcattttagtgga
+gaatgatgaattgacccggctctttgggggttggaggattcattttcccc
+ttcaacaggtgctcaaataattaaagagaatagaaccgacgatatatggg
+tcaaatgtttatacagttgtgctactaaacgtattgttacacgcgttatt
+aataattagaattagaattatatcgttttgtacggattgagaatcccatt
+tgggtcgaaaatgttcttgagcttctttgtcaaaagtcgttcttcaggat
+cttttccgaatgtggagtacggtagcttcagttgaccgattccatgttca
+gctgaaattgacccgccgtgatcgacaacccattcgtagaggaagggata
+gagcctggaatttgaaattattaatagaattaaaattttataaaaaagaa
+aaatctcgaaacttacaatttttcaagttcttcattgtgtttttcagatg
+taatatttaaatgagtgtttccatctcccaaatgtccataagtgacaatt
+cttttagccaaagaaccacatcgttccttcatcacattcgtcaattcata
+ataattttccaatggtaaagaaacatcatgcttatagacatatccatctc
+tggtaaccgccaatggtgcactttctctcaattgccacatcttcgtagct
+tctgcagaagatccagcgagcactccgtcgataatcagattttttgagag
+gcattcatccaaaaatgcgctcattttctccatatcgtgatcttcattgg
+atcctgatgtttcgacaaggatcgagaatggggttggagcattcagtact
+gggtgcaatcctagattcgttttcaaacattccatcgtagcatcgtcgag
+aagttcgaaggaggaaaggatttcagtgaggctggattttgcaagcttca
+gtacttcgcagcattttttgaaggattcgatgcctgaaaaaaattagaaa
+ttgaaaaatttgaacccattatataaatattcaacctaccaagcatcgca
+ctttgtacacttttcggtttaggaaccgcagtcattgtgacgcttgtgat
+aacacccaactgcccttcacttcccaagaataggtgtggagtgtgcaaag
+ttgtgttgtccttccgaatgcttgatcccaaatgtagaacagttccatgt
+tcatcgggaagaaccaccgtgagcccgagaagatgtgcgtgaagacttcc
+gtatcgaatcaaccgaatccctcctgcacacgtggcaatatttcctccaa
+tctgacaggatcctttggctccgagatcaaaaggcatcatatatccgagt
+tttgcaagtttgttgtccaagtcttccagaataaaaccagaatcacactt
+gagaattcccattgtgtcgtcgaaagagaattgcttgttgattttattca
+ttgatataacaacttcatcatgtactggaattgatccaccaacgagtcct
+gtattccctccttgtgggactactgccaatttgttcttggagcaatacgc
+caatattgcagaaacctcttcggtgctcttcgggtagaggacgacactgc
+caggacctggaaattaaaacatttaatatttaaagtacctatttaatatt
+aaataaacctaatcttaaaatacaaatacctttaaattggcctgtccaat
+ctgtagtatgatttgtaatatcgtccttcttcactgcatcttgcccgaga
+aaattctcaaatgccattaaatccgactgcatcacttttgcaaatccttc
+atgtctggcggcaagaactgctgcgtagcttctagttggtcttctgattc
+gtggcaagattttcagaaacatctgaattcagaaacttttcaaaaaataa
+cttgaaacttcaaaagaaatagtaggagacaaagctcggctaggtgaccg
+
+>chrIII:2172325-2172669
+CAGGCAGTCAGTCAGTCTCTCTCCACTCACATTCAGTCAGTCAGTGACCA
+TCACCAGGTTGTGTGTGAGCCCCTTCCATTATTCTTCTGAACCCCCCTCC
+CAACGCGGGAAGACTCGCCGGCAATGACACTGGTTATCTTTTCCATCGTG
+GAATGCCCCCCATTGATTTTTTCCCCTTTTCGGGGGGAAAAAATTGGAAA
+CGAGAAAGGTATCGGGTGTCATAGCCGGCGTGATCATCTTCCTCAAGTAT
+TCCTCTGCTGATTTCGATATTCGGAGGAGCTCGGAGGCTGTTCAGCTGTA
+AAATTGGTTGATTAAGCTCAAAAATCGGCGTTAACGCGGCTTTCC
b
diff -r 000000000000 -r eaac585f172a test-data/mature_ref_other_species.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mature_ref_other_species.fa Tue Jan 27 09:06:24 2015 -0500
b
b'@@ -0,0 +1,490 @@\n+>cbr-let-7\n+TGAGGTAGTAGGTTGTATAGTT\n+>cbr-lin-4\n+TCCCTGAGACCTCAAGTGTGA\n+>cbr-miR-1\n+TGGAATGTAAAGAAGTATGTA\n+>cbr-miR-34\n+AGGCAGTGTGGTTAGCTGGTTG\n+>cbr-miR-42\n+CACCGGGTTAACATCTACAG\n+>cbr-miR-43\n+TATCACAGTTTACTTGCTGTCGC\n+>cbr-miR-44\n+TGACTAGAGACACATTCAGCT\n+>cbr-miR-45\n+TGACTAGAGACACATTCAGCT\n+>cbr-miR-46\n+TGTCATGGAGTCGCTCTCTTCA\n+>cbr-miR-47\n+TGTCATGGAGGCGCTCTCTTCA\n+>cbr-miR-48\n+TGAGGTAGGCTCAGTAGATGCGA\n+>cbr-miR-49\n+AAGCACCACGAGAAGCTGCAGA\n+>cbr-miR-50\n+TGATATGTCTGATATTCTTGGGTT\n+>cbr-miR-52\n+CACCCGTACATATGTTTCCGTGCT\n+>cbr-miR-57\n+TACCCTGTAGATCGAGCTGTGTGT\n+>cbr-miR-58\n+TGAGATCGTTCAGTACGGCAAT\n+>cbr-miR-60\n+TATTATGCACATTTTCTAGTCCA\n+>cbr-miR-67\n+TCACAACCTCCTAGAAAGAGTAGA\n+>cbr-miR-71\n+TGAAAGACATGGGTAGTGA\n+>cbr-miR-73\n+TGGCAAGATGTTGGCAGTTCAGT\n+>cbr-miR-74\n+TGGCAAGAAATGGCAGTCTAGA\n+>cbr-miR-75\n+TTAAAGCTACCAACCGCCTTCA\n+>cbr-miR-77\n+TTCATCAGGCCATAGCTGTCCA\n+>cbr-miR-79\n+ATAAAGCTAGGTTACCAAAGCT\n+>cbr-miR-80\n+TGAGATCATTAGTTGAAAGCCGA\n+>cbr-miR-81\n+TGAGATCATCGTGAAAGCTAGT\n+>cbr-miR-82\n+TGAGATCATCGTGAAAGCCAGT\n+>cbr-miR-85\n+TACAAAGTATTTGAAAAGGCGTGC\n+>cbr-miR-86\n+TAAGTGAATGCTTTGCCACAGTC\n+>cbr-miR-87\n+GTGAGCAAAGTTTCAGGTGT\n+>cbr-miR-90\n+TGATATGTTGTTTGAATGCCCC\n+>cbr-miR-124\n+TAAGGCACGCGGTGAATGCCA\n+>cbr-miR-228\n+AATGGCACTGCATGAATTCACGG\n+>cbr-miR-230\n+GTATTAGTTGTGCGACCAGGAAA\n+>cbr-miR-232\n+TAAATGCATCTTAACTGCGGTGA\n+>cbr-miR-233\n+TTGAGCAATGCGCATGTGCGGGA\n+>cbr-miR-234\n+TTATTGCTCGAGAATACCCTT\n+>cbr-miR-236\n+TAATACTGTCAGGTAATGACGCT\n+>cbr-miR-241\n+TGAGGTAGGTGTGAGAAATGA\n+>cbr-miR-244\n+TCTTTGGTTGTACAAAGTGGTATG\n+>cbr-miR-245\n+ATTGGTCCCCTCCAAGTAGCTC\n+>cbr-miR-248\n+TACACGTGCTCGGATAACGCTCA\n+>cbr-miR-250\n+TCACAGTCAACTGTTGGCACGG\n+>cbr-miR-251\n+TTAAGTAGTGGTGCCGCTCTTATT\n+>cbr-miR-252\n+TAAGTAGTAGTGCCGCAGGTAAC\n+>cbr-miR-259\n+AAATCTCATCCTAATCTGGTT\n+>cbr-miR-268\n+GGCAAGAATTAGAAGCAGTTTTGGT\n+>cbr-miR-72\n+AGGCAGATGTTGGCATAGC\n+>cbr-lsy-6\n+TTTTGTATGAGACGCATTCCG\n+>cbr-miR-353\n+CAAGTATCATGTGTTGGTATC\n+>cbr-miR-64\n+CATGACACTGAAGCGTGTACGGA\n+>cbr-miR-231\n+TAAGCTCGTGAACAACAGGCAGGA\n+>cbr-miR-356\n+ATGAGCAACGCGAACAAATCC\n+>cbr-miR-83\n+TAGCACCATATAAATTCAGTGT\n+>cbr-miR-246\n+TTACATGTATTGGGTAGGAGCT\n+>cbr-miR-51\n+TACCCGTAGCTCCTTGCCATGTT\n+>cbr-miR-357\n+AAAATGCCAGTCATTGACGGA\n+>cbr-miR-253\n+CACACCTCACTAACACTAACT\n+>cbr-miR-70\n+TAATACGTGATTGGTGTTCCCAG\n+>cbr-miR-358\n+CAATTGGTATCCTTAGTCGTGG\n+>cbr-miR-61\n+TGACTAGAACCTTGACTCTGCTC\n+>cbr-miR-360\n+TGACCGTAATCCCGTTCACAA\n+>cbr-miR-239a\n+TTTGTACTACAATTAGGTACTGG\n+>cbr-miR-249\n+TCACAGGATTTTTGAGTGTTGC\n+>cbr-miR-240\n+TACTGGCCTCCAAATTTTCGCT\n+>cbr-miR-254\n+TGCAAATCTTTTGCAACTGTATA\n+>cbr-miR-239b\n+TTGTACTGCACAAAAGTACTG\n+>cbr-miR-62\n+TGATATGTAATCTAGCTTACAG\n+>cbr-miR-55\n+TACCCGTATATTTTTCTGCCGAG\n+>cbr-miR-84\n+TGAGGTAGTTTGCAATGCTGTC\n+>cbr-miR-354\n+ACCTTGTTTGTTGCTGCTCCT\n+>cbr-miR-35\n+TCACCGGGTGAAAACTTGCAAG\n+>cbr-miR-36\n+TCACCGGGTGAAAATTCGCAAT\n+>cbr-miR-38\n+TCACCGGGAGACAACCTGGTAT\n+>cbr-miR-39\n+TCACCGGGTGAAAAACGGTTAG\n+>cbr-miR-40\n+TCACCGGGTGTCAATCAGCTAG\n+>cbr-miR-41\n+TCACCGGGTGAAAAACTCCCA\n+>cbr-miR-355\n+TTTGTTTTAGCCTGAGCTATG\n+>cbr-miR-784\n+TGGCACAATACCTGTATGTAGA\n+>cbr-miR-785\n+TAAGTGAATACTCTGTGTTGA\n+>cbr-miR-786\n+TAATGCCCTGTACGAGATTTGGT\n+>cbr-miR-787\n+TAAGCTCGTCTTAGTTTTCCTCT\n+>cbr-miR-788\n+TCCGCTTCTCAATGCTCCATTTGCAA\n+>cbr-miR-789a\n+TCCCTGCCTGGGTCAAATGTTTT\n+>cbr-miR-789b\n+GCCCTGCCTGGGTCACCATGTGA\n+>cbr-miR-790\n+CTTGGCACTCGCGAACACCGCG\n+>cbr-miR-791\n+ATTGGCACTCCGCTGATTTGGTG\n+>cbr-miR-792\n+TTGAAATTTTTTCTATTTTCGGT\n+>cbr-miR-235\n+TATTGCACTTTCCCTGGCCAGA\n+>cbr-miR-242\n+TTGCGTAGGCCTTTGTTTCGA\n+>cbr-miR-255\n+AAACTGAAGAGATTTTTTACAG\n+>cbr-miR-359\n+TCACTGGTTATCCTCTGTCGAA\n+>cbr-miR-392\n+TATCATCGATCATGTGAGCTGT\n+>dme-miR-1\n+TGGAATGTAAAGAAGTATGGAG\n+>dme-miR-2a\n+TATCACAGCCAGCTTTGATGAGC\n+>dme-miR-2b\n+TATCACAGCCAGCTTTGAGGAGC\n+>dme-miR-3\n+TCACTGGGCAAAGTGTGTCTCA\n+>dme-miR-4\n+ATAAAGCTAGACAACCATTGA\n+>dme-miR-5\n+AAAGGAACGATCGTTGTGATATG\n+>dme-miR-6\n+TATCACAGTGGCTGTTCTTTTT\n+>dme-miR-7\n+TGGAAGACTAGTGATTTTGTTGT\n+>dme-miR-8\n+TAATACTGTCAGGTAAAGATGTC\n+>dme-miR-9a\n+TCTTTGGTTATCTAGCTGTATGA\n+>dme-miR-10\n+CAAATTCGGTTCTAGAGAGGTTT\n+>dme-miR-11\n+CATCACAGTCTGAGTTCTTGC\n+>dme-miR-12\n+TGAGTATTACATCAGGTAC'..b'303\n+TTTAGGTTTCACAGGAAACTGGT\n+>dme-miR-31b\n+TGGCAAGATGTCGGAATAGCTG\n+>dme-miR-304\n+TAATCTCAATTTGTAAATGTGAG\n+>dme-miR-305\n+ATTGTACTTCATCAGGTGCTCTG\n+>dme-miR-9c\n+TCTTTGGTATTCTAGCTGTAGA\n+>dme-miR-306\n+TCAGGTACTTAGTGACTCTCAA\n+>dme-miR-9b\n+TCTTTGGTGATTTTAGCTGTATG\n+>dme-let-7\n+TGAGGTAGTAGGTTGTATAGT\n+>dme-miR-125\n+TCCCTGAGACCCTAACTTGTGA\n+>dme-miR-307\n+TCACAACCTCCTTGAGTGAG\n+>dme-miR-308\n+AATCACAGGATTATACTGTGAG\n+>dme-miR-31a\n+TGGCAAGATGTCGGCATAGCTGA\n+>dme-miR-309\n+GCACTGGGTAAAGTTTGTCCTA\n+>dme-miR-310\n+TATTGCACACTTCCCGGCCTTT\n+>dme-miR-311\n+TATTGCACATTCACCGGCCTGA\n+>dme-miR-312\n+TATTGCACTTGAGACGGCCTGA\n+>dme-miR-313\n+TATTGCACTTTTCACAGCCCGA\n+>dme-miR-314\n+TATTCGAGCCAATAAGTTCGG\n+>dme-miR-315\n+TTTTGATTGTTGCTCAGAAAGC\n+>dme-miR-316\n+TGTCTTTTTCCGCTTACTGGCG\n+>dme-miR-317\n+TGAACACAGCTGGTGGTATCCAGT\n+>dme-miR-318\n+TCACTGGGCTTTGTTTATCTCA\n+>dme-miR-2c\n+TATCACAGCCAGCTTTGATGGGC\n+>dme-miR-iab-4-5p\n+ACGTATACTGAATGTATCCTGA\n+>dme-miR-iab-4-3p\n+CGGTATACCTTCAGTATACGTAAC\n+>dme-miR-iab-4as-5p\n+TTACGTATACTGAAGGTATACCG\n+>dme-miR-iab-4as-3p\n+GGATACATTCAGTATACGTTTA\n+>dme-miR-954\n+TCTGGGTGTTGCGTTGTGTGT\n+>dme-miR-955\n+CATCGTGCAGAGGTTTGAGTGTC\n+>dme-miR-190\n+AGATATGTTTGATATTCTTGGTTG\n+>dme-miR-193\n+TACTGGCCTACTAAGTCCCAAC\n+>dme-miR-956\n+TTTCGAGACCACTCTAATCCATT\n+>dme-miR-957\n+TGAAACCGTCCAAAACTGAGGC\n+>dme-miR-958\n+TGAGATTCTTCTATTCTACTTT\n+>dme-miR-375\n+TTTGTTCGTTTGGCTTAAGTTA\n+>dme-miR-959\n+TTGTCATCGGGGGTATTATGAA\n+>dme-miR-960\n+TGAGTATTCCAGATTGCATAGC\n+>dme-miR-961\n+TTTGATCACCAGTAACTGAGAT\n+>dme-miR-962\n+ATAAGGTAGAGAAATTGATGCTGTC\n+>dme-miR-963\n+ACAAGGTAAATATCAGGTTGTTTC\n+>dme-miR-964\n+TTAGAATAGGGGAGCTTAACTT\n+>dme-miR-932\n+TCAATTCCGTAGTGCATTGCAG\n+>dme-miR-965\n+TAAGCGTATAGCTTTTCCCCTT\n+>dme-miR-966\n+TGTGGGTTGTGGGCTGTGTGG\n+>dme-miR-967\n+AGAGATACCTCTGGAGAAGCG\n+>dme-miR-1002\n+TTAAGTAGTGGATACAAAGGGCGA\n+>dme-miR-968\n+TAAGTAGTATCCATTAAAGGGTTG\n+>dme-miR-969\n+GAGTTCCACTAAGCAAGTTTT\n+>dme-miR-970\n+TCATAAGACACACGCGGCTAT\n+>dme-miR-971\n+TTGGTGTTACTTCTTACAGTGA\n+>dme-miR-972\n+TGTACAATACGAATATTTAGGC\n+>dme-miR-973\n+TGGTTGGTGGTTGAACTTCGATTTT\n+>dme-miR-974\n+AAGCGAGCAAAGAAGTAGTATT\n+>dme-miR-975\n+TAAACACTTCCTACATCCTGTAT\n+>dme-miR-976\n+TTGGATTAGTTATCATCAATGC\n+>dme-miR-977\n+TGAGATATTCACGTTGTCTAA\n+>dme-miR-978\n+TGTCCAGTGCCGTAAATTGCAG\n+>dme-miR-979\n+TTCTTCCCGAACTCAGGCTAA\n+>dme-miR-980\n+TAGCTGCCTTGTGAAGGGCTTA\n+>dme-miR-981\n+TTCGTTGTCGACGAAACCTGCA\n+>dme-miR-982\n+TCCTGGACAAATATGAAGTAAAT\n+>dme-miR-983\n+ATAATACGTTTCGAACTAATGA\n+>dme-miR-984\n+TGAGGTAAATACGGTTGGAATTT\n+>dme-miR-927\n+TTTAGAATTCCTACGCTTTACC\n+>dme-miR-985\n+CAAATGTTCCAATGGTCGGGCA\n+>dme-miR-986\n+TCTCGAATAGCGTTGTGACTGA\n+>dme-miR-987\n+TAAAGTAAATAGTCTGGATTGATG\n+>dme-miR-988\n+CCCCTTGTTGCAAACCTCACGC\n+>dme-miR-989\n+TGTGATGTGACGTAGTGGAAC\n+>dme-miR-137\n+TATTGCTTGAGAATACACGTAG\n+>dme-miR-990\n+ATTCACCGTTCTGAGTTGGCC\n+>dme-miR-991\n+TTAAAGTTGTAGTTTGGAAAGT\n+>dme-miR-992\n+AGTACACGTTTCTGGTACTAAG\n+>dme-miR-929\n+CTCCCTAACGGAGTCAGATTG\n+>dme-miR-993\n+GAAGCTCGTCTCTACAGGTATCT\n+>dme-miR-994\n+CTAAGGAAATAGTAGCCGTGAT\n+>dme-miR-995\n+TAGCACCACATGATTCGGCTT\n+>dme-miR-996\n+TGACTAGATTTCATGCTCGTCT\n+>dme-miR-252\n+CTAAGTACTAGTGCCGCAGGAG\n+>dme-miR-997\n+CCCAAACTCGAAGGAGTTTCA\n+>dme-miR-998\n+TAGCACCATGAGATTCAGCTC\n+>dme-miR-999\n+TGTTAACTGTAAGACTGTGTCT\n+>dme-miR-1000\n+ATATTGTCCTGTCACAGCAGT\n+>dme-miR-1001\n+TGGGTAAACTCCCAAGGATCA\n+>dme-miR-1003\n+TCTCACATTTACATATTCACAG\n+>dme-miR-1004\n+TCTCACATCACTTCCCTCACAG\n+>dme-miR-1005\n+TCTGGAATCTTTAATTCGCAG\n+>dme-miR-1006\n+TAAATTCGATTTCTTATTCATAG\n+>dme-miR-1007\n+TAAGCTCAATTAACTGTTTGCA\n+>dme-miR-1008\n+TCACAGCTTTTTGTGTTTACA\n+>dme-miR-1009\n+TCTCAAAAATTGTTACATTTCAG\n+>dme-miR-1010\n+TTTCACCTATCGTTCCATTTGCAG\n+>dme-miR-1011\n+TTATTGGTTCAAATCGCTCGCAG\n+>dme-miR-1012\n+TTAGTCAAAGATTTTCCCCATAG\n+>dme-miR-1013\n+ATAAAAGTATGCCGAACTCG\n+>dme-miR-1014\n+AAAATTCATTTTCATTTGCAG\n+>dme-miR-1015\n+TCCTGGGACATCTCTCTTGCAG\n+>dme-miR-1016\n+TTCACCTCTCTCCATACTTAG\n+>dme-miR-1017\n+GAAAGCTCTACCCAAACTCATCC\n+>dme-miR-2279\n+TTTCACGCGAAGATATTTATTT\n+>dme-miR-2280\n+TCTTAGCTTGGCAATAAAATAT\n+>dme-miR-2281\n+TATCTGTATCTGCAGTATTGC\n+>dme-miR-2282\n+ATCGGTGAGCTAAAAATAGAAT\n+>dme-miR-2283\n+GAAAATATCATGAATACGACAAT\n'
b
diff -r 000000000000 -r eaac585f172a test-data/mature_ref_this_species.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mature_ref_this_species.fa Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,14 @@
+>cel-miR-36
+TCACCGGGTGAAAATTCGCATG
+>cel-miR-37
+TCACCGGGTGAACACTTGCAGT
+>cel-miR-38
+TCACCGGGAGAAAAACTGGAGT
+>cel-miR-39
+TCACCGGGTGTAAATCAGCTTG
+>cel-miR-40
+TCACCGGGTGTACATCAGCTAA
+>cel-miR-41
+TCACCGGGTGAAAAATCACCTA
+>cel-miR-229
+AATGACACTGGTTATCTTTTCCATCG
b
diff -r 000000000000 -r eaac585f172a test-data/output.mrd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.mrd Tue Jan 27 09:06:24 2015 -0500
b
b'@@ -0,0 +1,2108 @@\n+>chrII:11534525-11540624_7\n+score total\t\t      102170.8\n+score for star read(s)\t           3.9\n+score for read counts\t      102159.9\n+score for mfe\t\t           2.4\n+score for randfold\t           1.6\n+score for cons. seed\t             3\n+miRNA with same seed\t    cbr-miR-35\n+total read count\t        200394\n+mature read count\t        200381\n+loop read count\t\t             0\n+star read count\t\t            13\n+exp             fffffffffffffffffffffffffffffffSSSSSSSSSSSSSSSSSSSSSSSllllllllllllllllMMMMMMMMMMMMMMMMMMMMMMffffffffffffffffff\n+obs             fffffffffffffffffffffffffffffffSSSSSSSSSSSSSSSSSSSSSSSllllllllllllllllMMMMMMMMMMMMMMMMMMMMMMffffffffffffffffff\n+pri_seq         caacuauuauucucggaucagaucgagccauugcugguuucuuccacagugguacuuuccauuagaacuaucaccggguggaaacuagcaguggcucgaucuuuuccacu\n+pri_struct      ..............(((..(((((((((((((((((((((((.((.(.((((((.(((......))).)))))).))).)))))))))))))))))))))))..)))...\t#MM\n+seq_377956_x1   ...............................ugcugguuucuuccacagugg..........................................................\t0\n+seq_377882_x1   ...............................ugAugguuucuuccacaguggu.........................................................\t1\n+seq_375231_x11  ...............................ugcugguuucuuccacaguggua........................................................\t0\n+seq_377915_x1   ................................................................gaacuaucaccggguggaaacuagcag...................\t0\n+seq_378313_x1   ....................................................................Aaucaccggguggaaacuag......................\t1\n+seq_378070_x1   ....................................................................Aaucaccggguggaaacuagc.....................\t1\n+seq_374207_x17  ....................................................................Aaucaccggguggaaacuagcag...................\t1\n+seq_365952_x99  ....................................................................Aaucaccggguggaaacuagcagu..................\t1\n+seq_377777_x1   ....................................................................uCucaccggguggaaacuagcagu..................\t1\n+seq_377818_x1   .....................................................................Cucaccggguggaaacuagca....................\t1\n+seq_378115_x1   .....................................................................aucaccggguggaaacuagca....................\t0\n+seq_377479_x2   .....................................................................aucaccggguggaaacuagcag...................\t0\n+seq_377175_x3   .....................................................................Cucaccggguggaaacuagcag...................\t1\n+seq_377327_x2   .....................................................................Uucaccggguggaaacuagcag...................\t1\n+seq_369310_x61  .....................................................................Cucaccggguggaaacuagcagu..................\t1\n+seq_377793_x1   .....................................................................aucaccgggGggaaacuagcagu..................\t1\n+seq_377962_x1   .....................................................................aucaAcggguggaaacuagcagu..................\t1\n+seq_378051_x1   .....................................................................aucaccggguggaaaAuagcagu..................\t1\n+seq_378323_x1   .....................................................................aucaccggguggaaacuagAagu..................\t1\n+seq_371516_x35  .....................................................................aucaccggguggaaacuagcagu..................\t0\n+seq_377055_x3   .....................................................................aucaccggguggaaacuagcagC..................\t1\n+seq_377970_x1   .....................................................................aucaccggguggaaacuagcagG..................\t1\n+seq_377511_x2   .....................................................................Uucaccggguggaaacuagcagu..................\t1\n+seq_378079_x1   ..................................................'..b'..............................\t1\n+seq_377854_x1   ............................ugguuuuucucugcagugauaga.............................................................\t0\n+seq_375838_x9   .................................................gauacuucuaacaacucgcua..........................................\t0\n+seq_377941_x1   ....................................................................uaucaccgggugaaaaaucaccu.....................\t0\n+seq_378320_x1   .....................................................................aucaccgggugaaaaaucacAu.....................\t1\n+seq_378280_x1   .....................................................................aucaccggguAaaaaaucaccu.....................\t1\n+seq_376006_x8   .....................................................................aucaccgggugaaaaaucaccu.....................\t0\n+seq_376879_x4   .....................................................................aucaccgggugaaaaaucaccuU....................\t1\n+seq_375615_x10  .....................................................................aucaccgggugaaaaaucaccua....................\t0\n+seq_378108_x1   .....................................................................aucaccgggGgaaaaaucaccua....................\t1\n+seq_376851_x4   ......................................................................ucaccgggugaaaaaucaccu.....................\t0\n+seq_377803_x1   ......................................................................ucaccgggugaaCaaucaccua....................\t1\n+seq_378243_x1   ......................................................................ucaccgggugaaaGaucaccua....................\t1\n+seq_378091_x1   ......................................................................ucaccgAgugaaaaaucaccua....................\t1\n+seq_377871_x1   ......................................................................ucaccgggugaaaaauAaccua....................\t1\n+seq_377853_x1   ......................................................................uAaccgggugaaaaaucaccua....................\t1\n+seq_378138_x1   ......................................................................ucaccgggugCaaaaucaccua....................\t1\n+seq_378123_x1   ......................................................................uUaccgggugaaaaaucaccua....................\t1\n+seq_377052_x3   ......................................................................ucaccgggGgaaaaaucaccua....................\t1\n+cel-miR-41      ......................................................................ucaccgggugaaaaaucaccua....................\t0\n+seq_378249_x1   ......................................................................ucaccgggugaaaaaucaUcua....................\t1\n+seq_372631_x27  ......................................................................ucaccgggugaaaaaucaccua....................\t0\n+seq_377820_x1   ......................................................................ucaccAggugaaaaaucaccua....................\t1\n+seq_377247_x3   ......................................................................ucaccgggugaaaaaucaccuaU...................\t1\n+seq_377331_x2   ......................................................................ucaccgggugaaaaaucaccuaA...................\t1\n+seq_377731_x2   .......................................................................caccgggugaaaaaucaccua....................\t0\n+seq_377439_x2   .......................................................................caccgggugaaaaaucaccuaU...................\t1\n+seq_377980_x1   ........................................................................accgggCgaaaaaucaccua....................\t1\n+seq_376395_x6   ........................................................................accgggugaaaaaucaccua....................\t0\n+seq_378083_x1   .........................................................................ccgggugaaaaaucaccua....................\t0\n+seq_377991_x1   ..........................................................................cgggugaaaaaucaccua....................\t0\n+\n+\n+\n'
b
diff -r 000000000000 -r eaac585f172a test-data/precursors_ref_this_species.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/precursors_ref_this_species.fa Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,14 @@
+>cel-mir-36
+CACCGCTGTCGGGGAACCGCGCCAATTTTCGCTTCAGTGCTAGACCATCCAAAGTGTCTATCACCGGGTGAAAATTCGCATGGGTCCCCGACGCGGA
+>cel-mir-37
+TTCTAGAAACCCTTGGACCAGTGTGGGTGTCCGTTGCGGTGCTACATTCTCTAATCTGTATCACCGGGTGAACACTTGCAGTGGTCCTCGTGGTTTCT
+>cel-mir-38
+TCTGTGAGCCAGGTCCTGTTCCGGTTTTTTCCGTGGTGATAACGCATCCAAAAGTCTCTATCACCGGGAGAAAAACTGGAGTAGGACCTGTGACTCAT
+>cel-mir-39
+TATACCGAGAGCCCAGCTGATTTCGTCTTGGTAATAAGCTCGTCATTGAGATTATCACCGGGTGTAAATCAGCTTGGCTCTGGTGTC
+>cel-mir-40
+TCCTGTCCGCACCTCAGTGGATGTATGCCATGATGATAAGATATCAGAAATCCTATCACCGGGTGTACATCAGCTAAGGTGCGGGTACAGGT
+>cel-mir-41
+GGGTCCCAGAGACCTTGGTGGTTTTTCTCTGCAGTGATAGATACTTCTAACAACTCGCTATCACCGGGTGAAAAATCACCTAGGTCTGGAGCCTCCT
+>cel-mir-229
+CGCCGGCAATGACACTGGTTATCTTTTCCATCGTGGAATGCCCCCCATTGATTTTTTCCCCTTTTCGGGGGGAAAAAATTGGAAACGAGAAAGGTATCGGGTGTCATAGCCGGCG
b
diff -r 000000000000 -r eaac585f172a test-data/reads_collapsed.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads_collapsed.fa Tue Jan 27 09:06:24 2015 -0500
b
b'@@ -0,0 +1,3220 @@\n+>seq_0_x149538\n+TCACCGGGTGGAAACTAGCAGT\n+>seq_149538_x90904\n+TCACCGGGTGAACACTTGCAGT\n+>seq_240442_x25577\n+TCACCGGGTGTACATCAGCTAA\n+>seq_266019_x11571\n+TCACCGGGTGAAAATTCGCATG\n+>seq_277590_x9943\n+TCACCGGGTGGAAACTAGCAG\n+>seq_287533_x5429\n+TCACCGGGTGGAAACTAGCAGTT\n+>seq_292962_x4000\n+AATGACACTGGTTATCTTTTCCATCG\n+>seq_296962_x3796\n+TCACCGGGTGAACACTTGCAG\n+>seq_300758_x2851\n+TCACCGGGTGAACACTTGCAGTT\n+>seq_303609_x2808\n+TCACCGGGGGGAAACTAGCAGT\n+>seq_306417_x2793\n+TCACAGGGTGGAAACTAGCAGT\n+>seq_309210_x2159\n+TCACCGGGTGGAAACTAGCAGTA\n+>seq_311369_x2128\n+TCACCGGGTGTAAATCAGCTTG\n+>seq_313497_x2021\n+TCACCGGGTGGAAACTAGCA\n+>seq_315518_x1866\n+TCACCGGGCGGAAACTAGCAGT\n+>seq_317384_x1629\n+TCACCGGGAGGAAACTAGCAGT\n+>seq_319013_x1597\n+TCACAGGGTGAACACTTGCAGT\n+>seq_320610_x1349\n+TCACCGGGAGAAAAACTGGAGT\n+>seq_321959_x1326\n+TCACCGGGTGGAAAATAGCAGT\n+>seq_323285_x1320\n+TCACCGGGGGAACACTTGCAGT\n+>seq_324605_x1295\n+TCACCGGGTGGANACTAGCAGT\n+>seq_325900_x1251\n+TCACCGGGTGAACACTTGCAGTA\n+>seq_327151_x1237\n+TCACCGGGTGAACACTTGCA\n+>seq_328388_x1017\n+TCACCGGGTGTACATCAGCTA\n+>seq_329405_x990\n+ACCGGGTGGAAACTAGCAGT\n+>seq_330395_x911\n+TCACCGGGTGAANACTTGCAGT\n+>seq_331306_x853\n+TCACCGGGCGAACACTTGCAGT\n+>seq_332159_x841\n+TCACCGGGAGAACACTTGCAGT\n+>seq_333000_x757\n+TCACCGCGTGGAAACTAGCAGT\n+>seq_333757_x746\n+ACCGGGTGAACACTTGCAGT\n+>seq_334503_x690\n+TCACCGGGTGGAAACTAGCAGC\n+>seq_335193_x686\n+TCACCGGGTGAAAACTTGCAGT\n+>seq_335879_x674\n+TCACCGCGTGAACACTTGCAGT\n+>seq_336553_x669\n+TCACCGGGTGGAAACTAGCAGA\n+>seq_337222_x632\n+AGCTGATTTCGTCTTGGTAATA\n+>seq_337854_x604\n+TCAACGGGTGGAAACTAGCAGT\n+>seq_338458_x598\n+TCACCGGGTGGAAACTATCAGT\n+>seq_339056_x557\n+TCACCGGGTGGAAACTAGCAGG\n+>seq_339613_x541\n+TCACCGGGTGTACATCAGCTAAA\n+>seq_340154_x508\n+TCACCGGGTGAACACTGGCAGT\n+>seq_340662_x508\n+TCACCGGGTGGACACTAGCAGT\n+>seq_341170_x448\n+TCACCGGGTGAACACTTTCAGT\n+>seq_341618_x427\n+TCACCGGGTGAACAATTGCAGT\n+>seq_342045_x391\n+TCACCGGGTGAACACTTGCCGT\n+>seq_342436_x389\n+TCACCGGGTGGAAACTTGCAGT\n+>seq_342825_x384\n+TCACAGGGTGTACATCAGCTAA\n+>seq_343209_x360\n+TCACCGGGTGAAAACTAGCAGT\n+>seq_343569_x355\n+TCACCGGGTGTACATCAGCTAAT\n+>seq_343924_x352\n+TCACCGGGTGCAAACTAGCAGT\n+>seq_344276_x345\n+TCACCGGGTGGAGACTAGCAGT\n+>seq_344621_x340\n+TCAACGGGTGAACACTTGCAGT\n+>seq_344961_x338\n+TCACCGGGTGTACATCAGCTAT\n+>seq_345299_x327\n+TCACCGGGTGAACACTTGCTGT\n+>seq_345626_x322\n+TCACCGGGTGGAAATTAGCAGT\n+>seq_345948_x316\n+TCACCGGGTGAAAATTCGCATT\n+>seq_346264_x309\n+TCACCGGGTGGAAACTAGCCGT\n+>seq_346573_x304\n+TCACCGGGTGGTAACTAGCAGT\n+>seq_346877_x304\n+TCACCGGGTGGAAACTAGCTGT\n+>seq_347181_x298\n+TCACAGGGTGAAAATTCGCATG\n+>seq_347479_x287\n+TCACCGGGGGTACATCAGCTAA\n+>seq_347766_x287\n+TAACCGGGTGGAAACTAGCAGT\n+>seq_348053_x285\n+CACCGGGTGAACACTTGCAGT\n+>seq_348338_x283\n+TCACCGGGTGGAAACTAGTAGT\n+>seq_348621_x282\n+TCACCGGGTGGAAACTCGCAGT\n+>seq_348903_x271\n+TCACCGGGTGCACATCAGCTAA\n+>seq_349174_x270\n+TCACCGGGTGAACACTTGCAGC\n+>seq_349444_x269\n+TCACCTGGTGGAAACTAGCAGT\n+>seq_349713_x268\n+TCACCGGGTGTANATCAGCTAA\n+>seq_349981_x265\n+TCACCGGGTTGAAACTAGCAGT\n+>seq_350246_x264\n+TCACCGGGTGGCAACTAGCAGT\n+>seq_350510_x263\n+TCACCGGGTGGAAACTACCAGT\n+>seq_350773_x263\n+TCACCGGGTGGAAACTAGAAGT\n+>seq_351036_x251\n+TCACCGGGGGAAAATTCGCATG\n+>seq_351287_x247\n+TCACCGGGTGGACATCAGCTAA\n+>seq_351534_x242\n+TCACCGGGTGAACACTTGCAGG\n+>seq_351776_x240\n+ACCGGGTGTACATCAGCTAA\n+>seq_352016_x236\n+TCACCGGGTGGAAACTAGNAGT\n+>seq_352252_x230\n+TCACCGGGAGAAAAACTGGAGTT\n+>seq_352482_x229\n+TCACCGGGTGAAAATTCGCAT\n+>seq_352711_x226\n+TCACCGGGTGAAGACTTGCAGT\n+>seq_352937_x224\n+TCACCGGGCGTACATCAGCTAA\n+>seq_353161_x223\n+TCACCGGGTGGAAACTNGCAGT\n+>seq_353384_x221\n+CACCGGGTGAACACTTGCAGTT\n+>seq_353605_x219\n+TCACCGGATGGAAACTAGCAGT\n+>seq_353824_x217\n+TCACCGGGTGGAAACTAGCATT\n+>seq_354041_x214\n+TAACCGGGTGAACACTTGCAGT\n+>seq_354255_x214\n+TCACCGGGTGTAAATCAGCTTT\n+>seq_354469_x212\n+TCACCGGGTTAACACTTGCAGT\n+>seq_354681_x211\n+TCAGCGGGTGGAAACTAGCAGT\n+>seq_354892_x210\n+TCACCGGGTGCACACTTGCAGT\n+>seq_355102_x209\n+TCACCGGCTGGAAACTAGCAGT\n+>seq_355311_x207\n+TCACCGGCTGAACACTTGCAGT\n+>seq_355518_x204\n+TCATCGGGTGGAAACTAGCAGT\n+>seq_355722_x204\n+TTACCGGGTGGAAACTAGCAGT\n+>seq_355926'..b'ACCGGGTGAACACTTGCGGT\n+>seq_378231_x1\n+TCACCGGGTGTACATCAGGT\n+>seq_378232_x1\n+TGTGGGTCTCCGTTGCGGTGCTA\n+>seq_378233_x1\n+TCCGGGTGAACACTTGCAGT\n+>seq_378234_x1\n+TCACCGGGTTTAAATCAGCTTG\n+>seq_378235_x1\n+TCACCAGGTGTACATCAGCTA\n+>seq_378236_x1\n+CTTAGTCAATTCCTCAAC\n+>seq_378237_x1\n+ACCGGGTGCACACTTGCAGT\n+>seq_378238_x1\n+TCACCGGGTGAACACTCGCA\n+>seq_378239_x1\n+GAAAAGGGGAGCAGAACGAAAAT\n+>seq_378240_x1\n+TCACCGGGTGTACATGAGCTA\n+>seq_378241_x1\n+TCGGGTGAACACTTGCAG\n+>seq_378242_x1\n+TCACCGGGATAAAAACTGGAGT\n+>seq_378243_x1\n+TCACCGGGTGAAAGATCACCTA\n+>seq_378244_x1\n+ACCGCGTGTACATCAGCTAA\n+>seq_378245_x1\n+AAGAGAATAGAACCGACGATAT\n+>seq_378246_x1\n+TCACCGGGTGAACACTTGCAGTGTT\n+>seq_378247_x1\n+ACCGGGTGGAAACTAGCAAT\n+>seq_378248_x1\n+TAACCGGGTGTAAATCAGCTTG\n+>seq_378249_x1\n+TCACCGGGTGAAAAATCATCTA\n+>seq_378250_x1\n+CACCGGGTGAACACTTGCAGTTG\n+>seq_378251_x1\n+ACCGGGTGGAAACTAGTAGT\n+>seq_378252_x1\n+TCACCGGGTGCAAACTAGCAGTG\n+>seq_378253_x1\n+TCACCGGGAGAAAAAGTGGAGT\n+>seq_378254_x1\n+GCCACTTTTCGCTTCAGTGCTA\n+>seq_378255_x1\n+ACCGGATGAAAATTCGCATG\n+>seq_378256_x1\n+TCACCGGGTGTAAATCAACT\n+>seq_378257_x1\n+GCCAATTTTCGCTTCAATGCTA\n+>seq_378258_x1\n+CAGGGTGTACATCAGCTAA\n+>seq_378259_x1\n+TCACCGGGAGTACATCAGCTAAG\n+>seq_378260_x1\n+TCACCGGGTGTAAATTAGCTT\n+>seq_378261_x1\n+TCACCGGGTGAACACTTGCAGTGGT\n+>seq_378262_x1\n+TCACCGGGTGTATATCAGCTTG\n+>seq_378263_x1\n+CACCGGGTGAAGACTTGCAGT\n+>seq_378264_x1\n+ACCGAGTGAAAATTCGCATG\n+>seq_378265_x1\n+ATCACCGGGTGAACCCTTGCAGT\n+>seq_378266_x1\n+GGTGGTCTTTCTCTGCAGTGATA\n+>seq_378267_x1\n+ACCGGGTGTACATCAGCTAC\n+>seq_378268_x1\n+TCACCGGGACAAAAACTGGAGT\n+>seq_378269_x1\n+TCACCGGGTGAAGATTCGCAT\n+>seq_378270_x1\n+ACACCGGGTGAACACTTGCA\n+>seq_378271_x1\n+TAACCGGGTGAAAATTCGCAT\n+>seq_378272_x1\n+ACCGAGTGGAAACTAGCAG\n+>seq_378273_x1\n+TGTGGGTGTCCGTTGCGGTG\n+>seq_378274_x1\n+TCACCTGGTGTACATCAGCT\n+>seq_378275_x1\n+AGCCGATTTCGTCTTGGTAATA\n+>seq_378276_x1\n+TCACCGGGTGAACACTTGCAGTTG\n+>seq_378277_x1\n+TCACCGGGAGAAAAATTGGAG\n+>seq_378278_x1\n+ATCACCGGGTGTACATCACCTA\n+>seq_378279_x1\n+TCAGCGGGTGTACATCAGCTA\n+>seq_378280_x1\n+ATCACCGGGTAAAAAATCACCT\n+>seq_378281_x1\n+GAAAAGTGGAGCAGAACGAAA\n+>seq_378282_x1\n+ACCGGGAGGAAAACTGGAGT\n+>seq_378283_x1\n+GGGGGTTTTTCTCTGCAGTGATA\n+>seq_378284_x1\n+TCACCGGGTGAACAGTTGC\n+>seq_378285_x1\n+AACGAAAAGGGGATCTGGCACT\n+>seq_378286_x1\n+TGTGGGGGTCCGTTGCGGT\n+>seq_378287_x1\n+TCACCGGGTGGACATCAGC\n+>seq_378288_x1\n+ATCACCGGGGGAACACTTGCAG\n+>seq_378289_x1\n+ACCGGGTGAACACCTGCAG\n+>seq_378290_x1\n+AGCTGATTTCGTCTTGGTAGTA\n+>seq_378291_x1\n+TCACCGGGTGTACATCAGCTAAGG\n+>seq_378292_x1\n+ACCGGGCGTAAATCAGCTTG\n+>seq_378293_x1\n+GTTTTGATCGTGAGGCCCCTTC\n+>seq_378294_x1\n+ACCGGGTGGGAACTAGCA\n+>seq_378295_x1\n+TCACCGTGTGAACACTTG\n+>seq_378296_x1\n+TCACCGGTTGTACATCAG\n+>seq_378297_x1\n+TCACCGGGAGTAAATCAGCTT\n+>seq_378298_x1\n+ACCGGGTGAACACTTGCCG\n+>seq_378299_x1\n+TCACCGGGTGAACGCTTGCA\n+>seq_378300_x1\n+CCCCGGGTGGAAACTAGCAGT\n+>seq_378301_x1\n+TCACCGGGCGTAAATCAGCTT\n+>seq_378302_x1\n+CGACAATTTTCGCTTCAGTGCTA\n+>seq_378303_x1\n+CACCGGGTGAACACTTGCAGTGC\n+>seq_378304_x1\n+ACGGTATCCGGTCTCGACA\n+>seq_378305_x1\n+TCAACGGGAGAAAAACTGGAG\n+>seq_378306_x1\n+ACGGGGTGTACATCAGCTAA\n+>seq_378307_x1\n+ACCGGCTGAACACTTGCAGT\n+>seq_378308_x1\n+TCATCGGGAGAAAAACTGGAGT\n+>seq_378309_x1\n+TCACCGGGTGAACACTTACAG\n+>seq_378310_x1\n+TCACCGGGTGTACATCAGCTG\n+>seq_378311_x1\n+CCGGGTGTACATCAGCTA\n+>seq_378312_x1\n+AGTGTGGGTGTCCGTTGCGGTGCTA\n+>seq_378313_x1\n+AATCACCGGGTGGAAACTAG\n+>seq_378314_x1\n+ACCGGGTGGAAACAAGCAGT\n+>seq_378315_x1\n+TGTGGGTGTCCGTTGCGGCGCTA\n+>seq_378316_x1\n+CACCGGGTGTACATCACCTAA\n+>seq_378317_x1\n+TAACCGGGTGAACACTTG\n+>seq_378318_x1\n+ACCGGGAGAAAAACTGAAGT\n+>seq_378319_x1\n+TCACCGGGTGGAAACTAGNAGTG\n+>seq_378320_x1\n+ATCACCGGGTGAAAAATCACAT\n+>seq_378321_x1\n+TCACCCGGTGAACACTTGCA\n+>seq_378322_x1\n+TGTGGGTGTCCGTTGCGGTGATA\n+>seq_378323_x1\n+ATCACCGGGTGGAAACTAGAAGT\n+>seq_378324_x1\n+TCACCGGGTGTACACCAGCT\n+>seq_378325_x1\n+ACCGGGTGGGAACTAGCAG\n+>seq_378326_x1\n+TCACCGGGTGAAAACTAGC\n+>seq_378327_x1\n+TCACCGGGTGAACACTTGAA\n+>seq_378328_x1\n+TCAGCGGGTGTAAATCAGC\n+>seq_378329_x1\n+CCGGGTGGAAACTAGCAGTGGCT\n+>seq_378330_x1\n+AGTGGTTGTATGCCATGATGATA\n+>seq_378331_x1\n+NCACCGGGTGGAAACTAGCAG\n+>seq_378332_x1\n+TCACCGGGTGTAAATCATC\n'
b
diff -r 000000000000 -r eaac585f172a test-data/reads_collapsed_vs_genome.arf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reads_collapsed_vs_genome.arf Tue Jan 27 09:06:24 2015 -0500
b
b'@@ -0,0 +1,481 @@\n+seq_0_x149538\t22\t1\t22\ttcaccgggtggaaactagcagt\tchrII:11534525-11540624\t22\t3060\t3081\ttcaccgggtggaaactagcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_149538_x90904\t22\t1\t22\ttcaccgggtgaacacttgcagt\tchrII:11534525-11540624\t22\t3285\t3306\ttcaccgggtgaacacttgcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_240442_x25577\t22\t1\t22\ttcaccgggtgtacatcagctaa\tchrII:11534525-11540624\t22\t3631\t3652\ttcaccgggtgtacatcagctaa\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_266019_x11571\t22\t1\t22\ttcaccgggtgaaaattcgcatg\tchrII:11534525-11540624\t22\t3165\t3186\ttcaccgggtgaaaattcgcatg\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_277590_x9943\t21\t1\t21\ttcaccgggtggaaactagcag\tchrII:11534525-11540624\t21\t3060\t3080\ttcaccgggtggaaactagcag\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_287533_x5429\t22\t1\t22\ttcaccgggtggaaactagcagt\tchrII:11534525-11540624\t22\t3060\t3081\ttcaccgggtggaaactagcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_292962_x4000\t26\t1\t26\taatgacactggttatcttttccatcg\tchrIII:2172325-2172669\t26\t123\t148\taatgacactggttatcttttccatcg\t+\t0\tmmmmmmmmmmmmmmmmmmmmmmmmmm\n+seq_296962_x3796\t21\t1\t21\ttcaccgggtgaacacttgcag\tchrII:11534525-11540624\t21\t3285\t3305\ttcaccgggtgaacacttgcag\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_300758_x2851\t22\t1\t22\ttcaccgggtgaacacttgcagt\tchrII:11534525-11540624\t22\t3285\t3306\ttcaccgggtgaacacttgcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_309210_x2159\t22\t1\t22\ttcaccgggtggaaactagcagt\tchrII:11534525-11540624\t22\t3060\t3081\ttcaccgggtggaaactagcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_311369_x2128\t22\t1\t22\ttcaccgggtgtaaatcagcttg\tchrII:11534525-11540624\t22\t3535\t3556\ttcaccgggtgtaaatcagcttg\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_313497_x2021\t20\t1\t20\ttcaccgggtggaaactagca\tchrII:11534525-11540624\t20\t3060\t3079\ttcaccgggtggaaactagca\t+\t0\tmmmmmmmmmmmmmmmmmmmm\n+seq_320610_x1349\t22\t1\t22\ttcaccgggagaaaaactggagt\tchrII:11534525-11540624\t22\t3382\t3403\ttcaccgggagaaaaactggagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_325900_x1251\t22\t1\t22\ttcaccgggtgaacacttgcagt\tchrII:11534525-11540624\t22\t3285\t3306\ttcaccgggtgaacacttgcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_327151_x1237\t20\t1\t20\ttcaccgggtgaacacttgca\tchrII:11534525-11540624\t20\t3285\t3304\ttcaccgggtgaacacttgca\t+\t0\tmmmmmmmmmmmmmmmmmmmm\n+seq_328388_x1017\t21\t1\t21\ttcaccgggtgtacatcagcta\tchrII:11534525-11540624\t21\t3631\t3651\ttcaccgggtgtacatcagcta\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_329405_x990\t20\t1\t20\taccgggtggaaactagcagt\tchrII:11534525-11540624\t20\t3062\t3081\taccgggtggaaactagcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmm\n+seq_333757_x746\t20\t1\t20\taccgggtgaacacttgcagt\tchrII:11534525-11540624\t20\t3287\t3306\taccgggtgaacacttgcagt\t+\t0\tmmmmmmmmmmmmmmmmmmmm\n+seq_334503_x690\t21\t1\t21\ttcaccgggtggaaactagcag\tchrII:11534525-11540624\t21\t3060\t3080\ttcaccgggtggaaactagcag\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_336553_x669\t21\t1\t21\ttcaccgggtggaaactagcag\tchrII:11534525-11540624\t21\t3060\t3080\ttcaccgggtggaaactagcag\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_337222_x632\t22\t1\t22\tagctgatttcgtcttggtaata\tchrII:11534525-11540624\t22\t3495\t3516\tagctgatttcgtcttggtaata\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_339056_x557\t21\t1\t21\ttcaccgggtggaaactagcag\tchrII:11534525-11540624\t21\t3060\t3080\ttcaccgggtggaaactagcag\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_339613_x541\t22\t1\t22\ttcaccgggtgtacatcagctaa\tchrII:11534525-11540624\t22\t3631\t3652\ttcaccgggtgtacatcagctaa\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_342045_x391\t22\t1\t22\ttcaccgggtgaacacttgccgt\tchrII:11534525-11540624\t22\t3285\t3306\ttcaccgggtgaacacttgcagt\t+\t1\tmmmmmmmmmmmmmmmmmmmMmm\n+seq_343569_x355\t22\t1\t22\ttcaccgggtgtacatcagctaa\tchrII:11534525-11540624\t22\t3631\t3652\ttcaccgggtgtacatcagctaa\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_344961_x338\t21\t1\t21\ttcaccgggtgtacatcagcta\tchrII:11534525-11540624\t21\t3631\t3651\ttcaccgggtgtacatcagcta\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_345299_x327\t22\t1\t22\ttcaccgggtgaacacttgctgt\tchrII:11534525-11540624\t22\t3285\t3306\ttcaccgggtgaacacttgcagt\t+\t1\tmmmmmmmmmmmmmmmmmmmMmm\n+seq_345948_x316\t21\t1\t21\ttcaccgggtgaaaattcgcat\tchrII:11534525-11540624\t21\t3165\t3185\ttcaccgggtgaaaattcgcat\t+\t0\tmmmmmmmmmmmmmmmmmmmmm\n+seq_346264_x309\t22\t1\t22\ttcaccgggtggaaactagccgt\tchrII:11534525-11540624\t22\t3060\t3081\ttcaccgggtggaaactagcagt\t+\t1\tmmmmmmmmmmmmmmmmmmmMmm\n+seq_346877_x304\t22\t1\t22\ttcaccgggtggaaactagctgt\tchrII:11534525-11540624\t22\t3060\t3081\ttcac'..b'mm\n+seq_378218_x1\t23\t1\t23\ttcaccgggtggaaactagcagcg\tchrII:11534525-11540624\t23\t3060\t3082\ttcaccgggtggaaactagcagtg\t+\t1\tmmmmmmmmmmmmmmmmmmmmmMm\n+seq_378219_x1\t22\t1\t22\taccgggtgtaaatcagcttgtc\tchrII:11534525-11540624\t22\t3537\t3558\taccgggtgtaaatcagcttggc\t+\t1\tmmmmmmmmmmmmmmmmmmmmMm\n+seq_378228_x1\t22\t1\t22\tagctgatttcgtcttggtaata\tchrII:11534525-11540624\t22\t3495\t3516\tagctgatttcgtcttggtaata\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_378230_x1\t23\t1\t23\tatcaccgggtgaacacttgcggt\tchrII:11534525-11540624\t23\t3284\t3306\tatcaccgggtgaacacttgcagt\t+\t1\tmmmmmmmmmmmmmmmmmmmmMmm\n+seq_378231_x1\t20\t1\t20\ttcaccgggtgtacatcaggt\tchrII:11534525-11540624\t20\t3631\t3650\ttcaccgggtgtacatcagct\t+\t1\tmmmmmmmmmmmmmmmmmmMm\n+seq_378239_x1\t22\t1\t22\tgaaaaggggagcagaacgaaaa\tchrII:11534525-11540624\t22\t23\t44\tgaaaaggggagcagaacgaaaa\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_378245_x1\t22\t1\t22\taagagaatagaaccgacgatat\tchrII:11534525-11540624\t22\t4274\t4295\taagagaatagaaccgacgatat\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_378246_x1\t25\t1\t25\ttcaccgggtgaacacttgcagtgtt\tchrII:11534525-11540624\t25\t3285\t3309\ttcaccgggtgaacacttgcagtggt\t+\t1\tmmmmmmmmmmmmmmmmmmmmmmmMm\n+seq_378247_x1\t20\t1\t20\taccgggtggaaactagcaat\tchrII:11534525-11540624\t20\t3062\t3081\taccgggtggaaactagcagt\t+\t1\tmmmmmmmmmmmmmmmmmmMm\n+seq_378249_x1\t22\t1\t22\ttcaccgggtgaaaaatcatcta\tchrII:11534525-11540624\t22\t3760\t3781\ttcaccgggtgaaaaatcaccta\t+\t1\tmmmmmmmmmmmmmmmmmmMmmm\n+seq_378250_x1\t23\t1\t23\tcaccgggtgaacacttgcagttg\tchrII:11534525-11540624\t23\t3286\t3308\tcaccgggtgaacacttgcagtgg\t+\t1\tmmmmmmmmmmmmmmmmmmmmmMm\n+seq_378261_x1\t25\t1\t25\ttcaccgggtgaacacttgcagtggt\tchrII:11534525-11540624\t25\t3285\t3309\ttcaccgggtgaacacttgcagtggt\t+\t0\tmmmmmmmmmmmmmmmmmmmmmmmmm\n+seq_378267_x1\t19\t1\t19\taccgggtgtacatcagcta\tchrII:11534525-11540624\t19\t3633\t3651\taccgggtgtacatcagcta\t+\t0\tmmmmmmmmmmmmmmmmmmm\n+seq_378273_x1\t20\t1\t20\ttgtgggtgtccgttgcggtg\tchrII:11534525-11540624\t20\t3246\t3265\ttgtgggtgtccgttgcggtg\t+\t0\tmmmmmmmmmmmmmmmmmmmm\n+seq_378276_x1\t24\t1\t24\ttcaccgggtgaacacttgcagttg\tchrII:11534525-11540624\t24\t3285\t3308\ttcaccgggtgaacacttgcagtgg\t+\t1\tmmmmmmmmmmmmmmmmmmmmmmMm\n+seq_378278_x1\t22\t1\t22\tatcaccgggtgtacatcaccta\tchrII:11534525-11540624\t22\t3630\t3651\tatcaccgggtgtacatcagcta\t+\t1\tmmmmmmmmmmmmmmmmmmMmmm\n+seq_378285_x1\t22\t1\t22\taacgaaaaggggatctggcact\tchrII:11534525-11540624\t22\t112\t133\taacgaaaaggggatctggcact\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_378290_x1\t22\t1\t22\tagctgatttcgtcttggtagta\tchrII:11534525-11540624\t22\t3495\t3516\tagctgatttcgtcttggtaata\t+\t1\tmmmmmmmmmmmmmmmmmmmMmm\n+seq_378291_x1\t24\t1\t24\ttcaccgggtgtacatcagctaagg\tchrII:11534525-11540624\t24\t3631\t3654\ttcaccgggtgtacatcagctaagg\t+\t0\tmmmmmmmmmmmmmmmmmmmmmmmm\n+seq_378303_x1\t22\t1\t22\tcaccgggtgaacacttgcagtg\tchrII:11534525-11540624\t22\t3286\t3307\tcaccgggtgaacacttgcagtg\t+\t0\tmmmmmmmmmmmmmmmmmmmmmm\n+seq_378310_x1\t20\t1\t20\ttcaccgggtgtacatcagct\tchrII:11534525-11540624\t20\t3631\t3650\ttcaccgggtgtacatcagct\t+\t0\tmmmmmmmmmmmmmmmmmmmm\n+seq_378311_x1\t18\t1\t18\tccgggtgtacatcagcta\tchrII:11534525-11540624\t18\t3634\t3651\tccgggtgtacatcagcta\t+\t0\tmmmmmmmmmmmmmmmmmm\n+seq_378312_x1\t25\t1\t25\tagtgtgggtgtccgttgcggtgcta\tchrII:11534525-11540624\t25\t3244\t3268\tagtgtgggtgtccgttgcggtgcta\t+\t0\tmmmmmmmmmmmmmmmmmmmmmmmmm\n+seq_378315_x1\t23\t1\t23\ttgtgggtgtccgttgcggcgcta\tchrII:11534525-11540624\t23\t3246\t3268\ttgtgggtgtccgttgcggtgcta\t+\t1\tmmmmmmmmmmmmmmmmmmMmmmm\n+seq_378319_x1\t23\t1\t23\ttcaccgggtggaaactagnagtg\tchrII:11534525-11540624\t23\t3060\t3082\ttcaccgggtggaaactagcagtg\t+\t1\tmmmmmmmmmmmmmmmmmmMmmmm\n+seq_378320_x1\t22\t1\t22\tatcaccgggtgaaaaatcacat\tchrII:11534525-11540624\t22\t3759\t3780\tatcaccgggtgaaaaatcacct\t+\t1\tmmmmmmmmmmmmmmmmmmmmMm\n+seq_378322_x1\t23\t1\t23\ttgtgggtgtccgttgcggtgata\tchrII:11534525-11540624\t23\t3246\t3268\ttgtgggtgtccgttgcggtgcta\t+\t1\tmmmmmmmmmmmmmmmmmmmmMmm\n+seq_378323_x1\t23\t1\t23\tatcaccgggtggaaactagaagt\tchrII:11534525-11540624\t23\t3059\t3081\tatcaccgggtggaaactagcagt\t+\t1\tmmmmmmmmmmmmmmmmmmmMmmm\n+seq_378327_x1\t20\t1\t20\ttcaccgggtgaacacttgaa\tchrII:11534525-11540624\t20\t3285\t3304\ttcaccgggtgaacacttgca\t+\t1\tmmmmmmmmmmmmmmmmmmMm\n+seq_378329_x1\t23\t1\t23\tccgggtggaaactagcagtggct\tchrII:11534525-11540624\t23\t3063\t3085\tccgggtggaaactagcagtggct\t+\t0\tmmmmmmmmmmmmmmmmmmmmmmm\n'
b
diff -r 000000000000 -r eaac585f172a test-data/result.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result.bed Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,8 @@
+chrII:11534525-11540624 3020 3081 novel:chrII:11534525-11540624_7 102170.8 + 3020 3081 255,0,0
+chrII:11534525-11540624 3245 3306 known:chrII:11534525-11540624_11 61011.7 + 3245 3306 255,0,0
+chrII:11534525-11540624 3590 3652 known:chrII:11534525-11540624_17 17007.3 + 3590 3652 255,0,0
+chrII:11534525-11540624 3123 3186 known:chrII:11534525-11540624_9 7482.8 + 3123 3186 255,0,0
+chrII:11534525-11540624 3494 3556 known:chrII:11534525-11540624_15 1978.6 + 3494 3556 255,0,0
+chrII:11534525-11540624 3715 3781 known:chrII:11534525-11540624_19 84.4 + 3715 3781 255,0,0
+chrII:11534525-11540624 3340 3403 known:chrII:11534525-11540624_13 5.5 + 3340 3403 255,0,0
+chrII:11534525-11540624 3284 3362 known:chrII:11534525-11540624_12 -0.2 + 3284 3362 255,0,0
b
diff -r 000000000000 -r eaac585f172a test-data/result.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result.csv Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,44 @@
+miRDeep2 score novel miRNAs reported by miRDeep2 novel miRNAs, estimated false positives novel miRNAs, estimated true positives known miRNAs in species known miRNAs in data known miRNAs detected by miRDeep2 estimated signal-to-noise excision gearing
+10 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+9 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+8 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+7 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+6 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+5 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+4 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+3 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+2 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+1 1 0 +/- 0 1 +/- 0 (68 +/- 47%) 7 6 6 (100%) 4 1
+0 1 1 +/- 1 1 +/- 1 (52 +/- 50%) 7 6 6 (100%) 2.2 1
+-1 1 1 +/- 1 1 +/- 1 (52 +/- 50%) 7 6 6 (100%) 2.5 1
+-2 1 1 +/- 1 0 +/- 0 (15 +/- 36%) 7 6 6 (100%) 1.9 1
+-3 1 1 +/- 1 0 +/- 0 (15 +/- 36%) 7 6 6 (100%) 1.9 1
+-4 1 1 +/- 1 0 +/- 0 (13 +/- 34%) 7 6 6 (100%) 1.8 1
+-5 1 2 +/- 1 0 +/- 0 (6 +/- 24%) 7 6 6 (100%) 1.7 1
+-6 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-7 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-8 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-9 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-10 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+
+
+
+novel miRNAs predicted by miRDeep2
+provisional id miRDeep2 score estimated probability that the miRNA candidate is a true positive rfam alert total read count mature read count loop read count star read count significant randfold p-value miRBase miRNA example miRBase miRNA with the same seed UCSC browser NCBI blastn consensus mature sequence consensus star sequence consensus precursor sequence precursor coordinate
+chrII:11534525-11540624_7 102170.8 83 +/- 38% - 200394 200381 0 13 yes - cbr-miR-35 - - ucaccggguggaaacuagcagu ugcugguuucuuccacaguggua ugcugguuucuuccacagugguacuuuccauuagaacuaucaccggguggaaacuagcagu chrII:11534525-11540624:3020..3081:+
+
+
+
+mature miRBase miRNAs detected by miRDeep2
+tag id miRDeep2 score estimated probability that the miRNA is a true positive rfam alert total read count mature read count loop read count star read count significant randfold p-value mature miRBase miRNA example miRBase miRNA with the same seed UCSC browser NCBI blastn consensus mature sequence consensus star sequence consensus precursor sequence precursor coordinate
+chrII:11534525-11540624_11 61011.7 83 +/- 38% - 119663 119545 0 118 yes cel-miR-37 cbr-miR-35 - - ucaccgggugaacacuugcagu uguggguguccguugcggugcua uguggguguccguugcggugcuacauucucuaaucuguaucaccgggugaacacuugcagu chrII:11534525-11540624:3245..3306:+
+chrII:11534525-11540624_17 17007.3 83 +/- 38% - 33350 33300 0 50 yes cel-miR-40 cbr-miR-35 - - ucaccggguguacaucagcuaa aguggauguaugccaugaugaua aguggauguaugccaugaugauaagauaucagaaauccuaucaccggguguacaucagcuaa chrII:11534525-11540624:3590..3652:+
+chrII:11534525-11540624_9 7482.8 83 +/- 38% - 14668 14617 0 51 yes cel-miR-36 cbr-miR-35 - - ucaccgggugaaaauucgcaug cgccaauuuucgcuucagugcua cgccaauuuucgcuucagugcuagaccauccaaagugucuaucaccgggugaaaauucgcaug chrII:11534525-11540624:3123..3186:+
+chrII:11534525-11540624_15 1978.6 83 +/- 38% - 3872 3014 1 857 yes cel-miR-39 cbr-miR-35 - - ucaccggguguaaaucagcuug agcugauuucgucuugguaaua agcugauuucgucuugguaauaagcucgucauugagauuaucaccggguguaaaucagcuug chrII:11534525-11540624:3494..3556:+
+chrII:11534525-11540624_19 84.4 83 +/- 38% - 164 68 9 87 yes cel-miR-41 - - - ggugguuuuucucugcagugaua ucaccgggugaaaaaucaccua ggugguuuuucucugcagugauagauacuucuaacaacucgcuaucaccgggugaaaaaucaccua chrII:11534525-11540624:3715..3781:+
+chrII:11534525-11540624_13 5.5 71 +/- 46% - 2140 2132 8 0 yes cel-miR-38 cbr-miR-35 - - ucaccgggagaaaaacuggagu uccgguuuuuuccguggugaua uccgguuuuuuccguggugauaacgcauccaaaagucucuaucaccgggagaaaaacuggagu chrII:11534525-11540624:3340..3403:+
+chrII:11534525-11540624_12 -0.2 52 +/- 50% - 119546 119545 1 0 no cel-miR-37 cbr-miR-35 - - ucaccgggugaacacuugcagu uguuccgguuuuuuccguggugaua ucaccgggugaacacuugcagugguccucgugguuucucugugagccagguccuguuccgguuuuuuccguggugaua chrII:11534525-11540624:3284..3362:+
+
+#miRBase miRNAs not detected by miRDeep2
+miRBase precursor id total read count mature read count(s) star read count remaining reads UCSC browser NCBI blastn miRBase mature sequence(s) miRBase star sequence(s) miRBase precursor sequence
+4000 4000 0 0 - - aaugacacugguuaucuuuuccaucg - cgccggcaaugacacugguuaucuuuuccaucguggaaugccccccauugauuuuuuccccuuuucggggggaaaaaauuggaaacgagaaagguaucgggugucauagccggcg
b
diff -r 000000000000 -r eaac585f172a test-data/survey.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/survey.csv Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,22 @@
+miRDeep2 score novel miRNAs reported by miRDeep2 novel miRNAs, estimated false positives novel miRNAs, estimated true positives known miRNAs in species known miRNAs in data known miRNAs detected by miRDeep2 estimated signal-to-noise excision gearing
+10 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+9 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+8 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+7 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+6 1 0 +/- 0 1 +/- 0 (83 +/- 38%) 7 6 5 (83%) 6.7 1
+5 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+4 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+3 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+2 1 0 +/- 0 1 +/- 0 (71 +/- 46%) 7 6 6 (100%) 4.5 1
+1 1 0 +/- 0 1 +/- 0 (68 +/- 47%) 7 6 6 (100%) 4 1
+0 1 1 +/- 1 1 +/- 1 (52 +/- 50%) 7 6 6 (100%) 2.2 1
+-1 1 1 +/- 1 1 +/- 1 (52 +/- 50%) 7 6 6 (100%) 2.5 1
+-2 1 1 +/- 1 0 +/- 0 (15 +/- 36%) 7 6 6 (100%) 1.9 1
+-3 1 1 +/- 1 0 +/- 0 (15 +/- 36%) 7 6 6 (100%) 1.9 1
+-4 1 1 +/- 1 0 +/- 0 (13 +/- 34%) 7 6 6 (100%) 1.8 1
+-5 1 2 +/- 1 0 +/- 0 (6 +/- 24%) 7 6 6 (100%) 1.7 1
+-6 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-7 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-8 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-9 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
+-10 1 3 +/- 1 0 +/- 0 (0 +/- 0%) 7 6 6 (100%) 1.5 1
b
diff -r 000000000000 -r eaac585f172a tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Jan 27 09:06:24 2015 -0500
b
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="perl" version="5.18.1">
+      <repository changeset_revision="114b6af405fa" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bowtie" version="0.12.7">
+      <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="vienna_rna" version="1.8.5">
+      <repository changeset_revision="54e961ee33d4" name="package_vienna_rna_1_8" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="pdf_api2" version="2.023">
+      <repository changeset_revision="b9b7fadfdb69" name="package_perl_pdf_api2_2_023" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="randfold" version="2.0">
+      <repository changeset_revision="b34d2c942c79" name="package_randfold_2_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="mirdeep2_quantifier" version="2.0">
+      <repository changeset_revision="2c039fc73f1f" name="mirdeep2_quantifier" owner="rnateam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="mirdeep2" version="2.0">
+      <install version="1.0">
+          <actions>
+              <action type="download_by_url">https://raw.githubusercontent.com/bgruening/download_store/master/miRDeep2/miRDeep2.tar.gz</action>
+                <action type="move_directory_files">
+                  <source_directory>.</source_directory>
+                    <destination_directory>$INSTALL_DIR</destination_directory>
+                </action>
+                <action type="set_environment">
+                  <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+
+MiRDeep2 is a software package for identification of novel and known miRNAs in deep sequencing data. Furthermore, it can be used for miRNA expression profiling across samples.
+
+        </readme>
+    </package>
+</tool_dependency>