Repository 'mira4_assembler'
hg clone https://toolshed.g2.bx.psu.edu/repos/peterjc/mira4_assembler

Changeset 0:6a88b42ce6b9 (2014-11-21)
Next changeset 1:70248e6e3efc (2015-08-05)
Commit message:
Uploaded v0.0.4, previously only on the TestToolShed
added:
test-data/U13small_m.fastq
test-data/U13small_m.mira4_de_novo.fasta
test-data/ecoli.fastq
test-data/ecoli.mira4_de_novo.fasta
test-data/empty_file.dat
test-data/header.mira
test-data/tvc_bait.fasta
test-data/tvc_contigs.fasta
test-data/tvc_map_ref_strain.fasta
test-data/tvc_map_same_strain.fasta
test-data/tvc_mini.fastq
test-data/tvc_mini_bait_neg.fastq
test-data/tvc_mini_bait_pos.fastq
test-data/tvc_mini_bait_strict.fastq
tools/mira4/README.rst
tools/mira4/mira4.py
tools/mira4/mira4_bait.py
tools/mira4/mira4_bait.xml
tools/mira4/mira4_convert.py
tools/mira4/mira4_convert.xml
tools/mira4/mira4_de_novo.xml
tools/mira4/mira4_make_bam.py
tools/mira4/mira4_mapping.xml
tools/mira4/mira4_validator.py
tools/mira4/repository_dependencies.xml
tools/mira4/tool_dependencies.xml
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/U13small_m.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/U13small_m.fastq Fri Nov 21 06:42:56 2014 -0500
b
b"@@ -0,0 +1,104 @@\n+@122533a.t3\n+ACGCGTTATTAAAGACTTTTTTTTTGGTTTTTTTCAAGTTCAAGTATTCTTTATTCAAAGTTGAAACATGTACCATACTGCATTATTGCAAAAATTCACTGGTACAAAACACTTTGCAGCTGGTGAGAAGGCAATAAAAAGTTGATTTTTAAACTCATTACTATAAATTATTCTTACAGTACTTTGCAAATTCAGAATTTCAAACTGCATGTTCTTTTTCTAAATTGCCCACAGTACTCGAGGTTCCTGAAGCTAAGGCAGCTGTTTCATAGGAGGGGGAAGAAGTATCAAATCTCTTGGGATTTCCATTTCTCTTTCCATGCCGACATACTTCAGGGCATCTTCCTGACTGTCTCTTTTTTCGGGAAACTTCTCTTCGCCAATCTGGATGGCTCTCTCGGCAAAAATACCTATCCTGTGATCACCGTTAACCATCATAACTTTTGCATACCACTTTGGATTTACCACCGATATGCCACAACGATTTTTGTTACCCTTGCAGGTTGCACCACCCAAAATCGTTGTTCTTTTGCCCTTGCCAAATTCTGGGCCCCAAGGGCCAATTCCCCAAAATTAATCCTTTTAATTTCGTGGGCCGCGTTCCAGCTGTCCCCCTTGTTGAAGGTTTTTTTTCGCCCCCCGGCTCTCCCCCCCCCTTTTTAAACCCCGTAAACCATTATAGTTGGTTAAACCCCCGGGGGGTTCCCCCTTGTAAATTTTACCCCCCCCTCTCCCCCATTTAAAAATATGTGCCCTTTTTCTCCCCCCCACTGGGCCCGGGGTTTTCTCCTATTCCGGGGAAAGAAAAACCCTGTTTTCTCTTCGCCAAAATACTCCCCCCCCCCTTTTTATAAAAAAAAAAAAAACTTGGGGGCCCCCACCCCCCCCCGGGGGGGAAAAAAAAAAGCGCGGGGTTTTTGCGCGTTTTTTTTGGGGGGGGGGGGGTTTCTTTTCCCCCCCTTTTTTCCTTCCCCCCCAAAGGAGAAAAAATCTTTTTTGTTGGGGCGCCTCCCGGCGCCCCTCCCGGGGTTGGCGCGGCGAAAACGCGGTTTTTCCCCCCCCCCCAAAGGGGGGGGCGCAGATACCGCGTTTACTCCCCATAAATACTCCGGGGGGGAATAACATCCCGGGAAAAAAAAATTTTATATCTATATATGGGGCCTCCCAAAAAGGGGCATAAAACACGGAGAAACCGGCGCCCTTGTGTGGGCGTTTTTCTCTCAATGGGCGCCCCCCCCCTCAACAAAAATAAAAA\n++\nn+@122533a.t7\n+GTTCGAGCAATATCCCTACGATCAACTAGAACAATGATTTTGTGGTGGATGCAACCCGCAAGGGTAACAAAATTCGTTTTGCAAATCATTCGGTAAATCCAAACTGCTATGCAAAAGTTATGATGGTTAACGGTGATCACAGGATAGGTATTTTTGCCAAGAGAGCCATCCAGACTGGCGAAGAGCTGTTTTTTGATTACAGATACAGCCAGGCTGATGCCCTGAAGTATGTCGGCATCGAAAGAGAAATGGAAATCCCTTGACATCTGCTACCTCCTCCCCCTCCTCTGAAACAGCTGCCTTAGCTTCAGGAACCTCGAGTACTGTGGGCAATTTAAAAAAAGAACATGCAGTTTGAAATTCTGAATTTGCAAAGTACTGTAAGAATAATTTATAGTAATGAGTTAAAAATCAACTTTTATTGCCTTCCACCACTGCAAGTGTTTGTACCGTGAATTTTGCAATAATGCCATATGGTACATTTTCCACTTTGAATAAAGAATACTTGAGCTTGAAAAAAAAAAAAAAAAAAGATTCTTTATTTAAGCGGCCGCGAGCTTTTCCCTTTAATAGGGTTAATTTAACTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n++\n+&&&,1/?/,/(+)+'('+)(1.+*+,0**1-1574=====EBAABA=:=56;6879=>7-025<???@BA>>>;?ACBA?A?AAAEBEA?ABAA=FF@@??AA?@AA?A=@=A<<ACAAAD?==>@==@=@E=@@A@=A@;2:22-&623@DFBAABB=59;/888B?B?<72;;:@==84.)1,9AAEEFECB???;B?<>??;BCBAAAAAA?==>=@DA??;7.<8<524/37AA??><.,1.87====AEB@AAA===869BA@?=@=?AAAABBABBABAAAAAC==A??ABBBEEDAEEBA97:053064:A7;)2,,2.63:5==>=@:::A@;A=7A==A?AAA?A=9====??=D====B@@=:E=A@AAA?;4:6=64==?AABAB05**33/,116>E@==9=:=:?@=:A=466,-)*..--/1230627:3666;.,*(1,996E<6;5@=8=@8-(/)"..b"AAAACAATAGAAATTAGTTATCCTGTCACGTGGGTTAAATGCAGCTGTTAATAAAATAGGCAAAGTAGTTGTCAAGAGTTAATCAGATACACTTCTGCAAGGATGACTAGTTTTATAAAATAAAATAATGGCATCAAAATGAATACCAGTTGAGTAATGTATTACTTTAAAATAACTGTGGGATCTTAAGTCATTTTATAAACACACTAGACCCATCAGACTCAATTTTTTTTTTCAATTTCAAGTATTCTTTATTCCAAGTTGAAAAATATCCATACTGCATTATTGCAAAAATTCACTGGTACAAAACACTTTGCAGCCGGGTGAGAAGGCCATAAAAAATTGAGTGTTTAAACTCATTGCTATAAATTATTCTTATAGTTCTCTTGCCAATTTCGAAATTTCAAACGGGCCTTTTCTTTTTTCTAAATTGCCCACCGTTTCCCGAAGGTTCCTGAAACCAAACTGAACGGAAGAAGGGAAAGTGTGTGAATAACTC\n++\n+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!=9=;49.-,,66,2(*18=56--1A;CEC?4787<?A;;;A:=BBAA=@@992;A>@D?EAEB894+75??BD?AAAAA??@=B=@?A??B???ABBEAAAABA@@@?CCFBBBAC@A>???>;AA@ACA?;8817AA?=;8;58?DDBAAAC???>CC93.*9.8CAAABAEDA@>D????A@??5:94>?:@BAA=??A?CDA@CCAB@AADC??97176/6;1?ADABAAAAAADAAAAEAAAAAAAAAAAAD??@??@DDAAA??8===?AAAAAAAADAAABABBBCAADAADDFAD@?CADDD?=A<5;2686==AAAC@AA@DDFCAFD??;;:8AA=>@:=9:==>>=88:+363?=;?@@EDFAAAAA?AA?AA??AAA===C?;<?@??=844.97?A@?:9A97B;;=>>AD9::646+6>2>806669>@??;8@???@=<<;@;E@D@AABAD>=>>=@===AAAEFFAAAAAAA??59?=@?20)),.7<@DC>==9=>A@;6532748?@@;<5<==A;==8==61**0-12173>@<@=A@2935<EBAA??AA??B>81..2988:61*25:<B@CCAADDEAAA??B?AA61-+34/836ADAA@98/8:55-397BD:1+,.2,.**/+--0?<8-,2*-24=;;5?>/5+,*24.,.+(+))*(-*(,))-)(-*(9,,,,(*.6@AA>-/-/((),+23;67>61)()(*()****,5:>A8;-,-*())))+))),,-,*+*,%15*43,+)))*(-3+,,65,*++,/-+((+,.36-4,+)+((0,())+&3*.)*++**)/4(5/(3..*)-/0453315702**-*14*-+/+(+.*((++\n+@U13e07f02.t2\n+ACATGCTGCAGTCGATCTAGAGGATCCCATGTCAAGGGATTTCCATTTCTCTTTCAATGCCAACATACTTCAGGGCATCAGCCTGGCTGTATCTGTAATCAAAAAACAGTTCTTCGCAAGTCTGGATGGCTCTCCTGACAAAAACACCTATCCTGTGATCACCATTAACCATCATAACTTTTGCATAGCAGTTTGGATTTACCGAATGATTTGCAAAACGAATTTTGTTACCCTTGCAGGTTGCATCCACCACAAAATCATTGTTCAAGTTGAACAGAAAGCTGCACATGTATTTATCACACACTTTCCCTCTTCCGTCAGCTTAGCTTCAGGAACCTCGAGTACTGTGGGCAATTTAAAAAAAGAAAATGCCGTTTGAAATTCTGAATTTGCAAAGTACTATAAGAATAATTTATAGCAATGAGTTTAAAACTCGACTTTTTATTGCCTTCTCACCAGCTGCAAAGTGTTTTGTACCAGTGAATTTTTGCAACAATGCAGTATGGTTTATTTTTCAACTTTGAATAAAGAATACTTGTAAATTGAAAAAAAAAATTGGAGTTCTGGATGGGTCTACGTGGTGTTTATAAAATAACCTAACGCATCGCCCCATCCAGCTTTTTTTCTGAGAGAGTACAGATACCAGCCACTCCCGCGGGGTTGTTTCGATCTTGCAATGCCCCGCCAACTTTTTAATTTTTAGCAGAAAACCCCAGGTTCTAGCTCCCATTGACCAGAAAAATTGTTTATCCTCGGAAATACAAACGCCCTGTTGGAACCACAACTCATGCCTCTCTGCGCCCCTCAAGTTCGTGTAAATGTGAAAACCAGCGCCCCGCCCCGAGTTTTAGAAACGCCCGCCCCGCCTTGTGGAAGCCTTAGGGGAAGTATAACGCCCAGCGACGTTCTGCTCTCCTGGACAGCGTCGGCGTATGGTATCTGACGGTCTAAACATACGTCACGTAACGGGAGCCCCCCTCTTCCGCATCTCTCCCCACTATTTGTGGCTAGCAGAAGAGAAATAATAAGCCGCAGCGTATCTGATGAGGAGCAGACGTTATCACGCTCTGCGGTGGCGGTCTTTAGAACACAAAATTTCGCTGTTGTCGCGCCCTTCTCCGACGGAAAAGAGAAAAAACCCTCTCTGTCTTTTCTATTAAAATAAGTAATTGGTGGGTTTTTTCTGCTGCCTTCTGACGACGACCGAAGAGAAACGCGGGTTTTGTATAACGCAGATATCACATCGCGCGCCCTCCGCGTCCG\n++\nn"
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/U13small_m.mira4_de_novo.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/U13small_m.mira4_de_novo.fasta Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,47 @@
+>MIRA_rep_c1
+aaatttcataaaatttctcatttcaaaaacgagaaaattaaggccttgcggggttaaatc
+gttatcaagtaatcagtcggctcactgcccagtgtggacactggccaatattatggcacc
+agcctttgagaaaaggaaacctttttttgaatccatctccctgacctggggtttgggttg
+ggttttatcagcatagggtgacaggcaagttcaatctgattggatcctggatcccatcat
+ggggtatctgttcctaaatcattccctgatcctcggtcagagcgagcgcttaggttcccg
+ctgcagttaaacacctggttcatctgggcatactaaggttatgactttccacctgggagt
+ccacgacaactgaaaaaactcaaaacttctttaagtagaagttgagcctgaagctgaggc
+agctgtttcagagaggaggaggaggaggagatagcaaatgtcaagggatttccatttctc
+tttcaatgccaacatacttcagggcatcagcctggctgtatctgtaatcaaaaaacagtt
+cttcgcaagtctggatggctctcctgacaaaaacacctatcctgtgatcaccattaacca
+tcataacttttgcatagcagtttggatttaccgaatgatttgcaaaacgaattttgttac
+ccttgcaggttgcatccaccacaaaatcattgttcaagttgaacagaaagctgcacatgt
+atttatcacacactttccctcttccgtcagcttagcttcaggaacctcgagtactgtggg
+caatttagaaaaagaaaatgccgtttgaaattctgaatttgcaaagtactataagaataa
+tttatagcaatgagtttaaaactcaactttttattgccttctcaccagctgcaaagtgtt
+ttgtaccagtgaatttttgcaataatgcagtatggtatatttttcaactttgaataaaga
+atacttgaaattgaaaaaaaaaattgagtctgatgggtctagtgtgtttataaaatgact
+taagatcccacagttattttaaagtaatacattactcaactggtattcattttgatgcca
+ttattttattttataaaactagtcatccttgcagaagtgtatctgattaactcttgacaa
+ctactttgcctattttattaacagctgcatttaacccacgtgacaggataactaatttct
+attgtttttgtatttttgcctcctccttccaagatagcagtagagtactgtgttaaatct
+tgcttcagaagagcttgtttaaaatagttataaagaaagtaaaatacatagaaaaacacg
+ttataattttaaattaaaaaagaaaaaagatgtctactctgaataaattatgcatacata
+taaggtctgaacaaaaacatgaaaagatgggctctgagtatcaattttaaaaatctttac
+cagtataacattactcaaacaaaaattaaaatttaatataattaagtacatattccaaga
+caacaaatggaccttgactgttaattctgg
+>MIRA_rep_c2
+gcaatatcgtgacgatatgtawgataaatacatgtgcagctytctgttcaacttgaacaa
+tgattttgtggtggatgcaacccgcaagggtaacaaaattcgttttgcaaatcattcggt
+aaatccaaactgctatgcaaaagttatgatggttaacggtgatcacaggatakgtatttt
+tgccaagagagccatccagactggcgaaragctgttttttgattacagatacagccaggc
+tgatgccctgaagtatgtcggcatcgaaagagaaatggaaatcccttgacatctgctacc
+tcctccccctcctctgaaacagctgccttagcttcaggaacctcgagtactgtgggcaat
+ttaaaaaaagaacatgcagtttgaaattctgaatttgcaaagtactgtaagaataattta
+tagtaatgagtttaaaaatcaactttttattgccttctcaccagctgcaaagtgttttgt
+wccagtgaattttgcaataatgcagtatggtacattttcactttgaataaagaatacttg
+a
+>MIRA_rep_c3
+gcgttattaaagacttttttttwtttttttttcaagttcaagtattctttattcaaagtt
+gaaaaatgtaccatactgcattattgcaaaaattcactggtacaaaacactttgcagctg
+gtgagaaggcaataaaaagttgatttttaaactcattactataaattattcttacagtac
+tttgcaaattcagaatttcaaactgcatgttctttttctaaattgcccacagtactcgag
+gttcctgaagctaaggcagctgtttcagaggagggggargaagtaycaaatytcwaggga
+tttccatttctctttccatgccgamatacttcagggcatcagcctgactgtmtctgtaat
+caaaaaacakctcttcgccavtctggatggctctctyggcaaaaatacctatcctgtgat
+caccgttaaccatcataacttttgcatagcattt
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/ecoli.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecoli.fastq Fri Nov 21 06:42:56 2014 -0500
b
b"@@ -0,0 +1,20164 @@\n+@frag_1\n+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTC\n++\n+##%')+.024JMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_1_a\n+GAGACATATTGCCCGTTGCAGTCAGAATGAAAAGCT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%##\n+@frag_2\n+AGAGACATATTGCCCGTTGCAGTCAGAATGAAAAGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%#\n+@frag_3\n+CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTG\n++\n+%')+.024JMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_4\n+ACAGAGACATATTGCCCGTTGCAGTCAGAATGAAAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'\n+@frag_5\n+TTTCATTCTGACTGCAACGGGCAATATGTCTCTGTG\n++\n+)+.024JMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_6\n+ACACAGAGACATATTGCCCGTTGCAGTCAGAATGAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMJ420.+\n+@frag_7\n+TCATTCTGACTGCAACGGGCAATATGTCTCTGTGTG\n++\n+.024JMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_8\n+CCACACAGAGACATATTGCCCGTTGCAGTCAGAATG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMJ420\n+@frag_9\n+ATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGA\n++\n+24JMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_10\n+ATCCACACAGAGACATATTGCCCGTTGCAGTCAGAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMJ4\n+@frag_11\n+TCTGACTGCAACGGGCAATATGTCTCTGTGTGGATT\n++\n+JMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_12\n+TAATCCACACAGAGACATATTGCCCGTTGCAGTCAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_13\n+TGACTGCAACGGGCAATATGTCTCTGTGTGGATTAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_14\n+TTTAATCCACACAGAGACATATTGCCCGTTGCAGTC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_15\n+ACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_16\n+TTTTTAATCCACACAGAGACATATTGCCCGTTGCAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_17\n+TGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_18\n+TTTTTTTAATCCACACAGAGACATATTGCCCGTTGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_19\n+CAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_20\n+TCTTTTTTTAATCCACACAGAGACATATTGCCCGTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_21\n+ACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_22\n+ACTCTTTTTTTAATCCACACAGAGACATATTGCCCG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_23\n+GGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_24\n+ACACTCTTTTTTTAATCCACACAGAGACATATTGCC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_25\n+GCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_26\n+AGACACTCTTTTTTTAATCCACACAGAGACATATTG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_27\n+AATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_28\n+TCAGACACTCTTTTTTTAATCCACACAGAGACATAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_29\n+TATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_30\n+TATCAGACACTCTTTTTTTAATCCACACAGAGACAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_31\n+TGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_32\n+GCTATCAGACACTCTTTTTTTAATCCACACAGAGAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_33\n+TCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_34\n+CTGCTATCAGACACTCTTTTTTTAATCCACACAGAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_35\n+TCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_36\n+AGCTGCTATCAGACACTCTTTTTTTAATCCACACAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_37\n+TGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_38\n+GAAGCTGCTATCAGACACTCTTTTTTTAATCCACAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_39\n+TGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_40\n+CAGAAGCTGCTATCAGACACTCTTTTTTTAATCCAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_41\n+TGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_42\n+TTCAGAAGCTGCTATCAGACACTCTTTTTTTAATCC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_43\n+GATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_44\n+AGTTCAGAAGCTGCTATCAGACACTCTTTTTTTAAT\n++\n+MMMMMMMMMMMMMMMMMMM"..b"4997\n+AATTGATGATGAATCATCAGTAAAATCTATTCATTA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_4998\n+ATAATGAATAGATTTTACTGATGATTCATCATCAAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_4999\n+TTGATGATGAATCATCAGTAAAATCTATTCATTATC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5000\n+AGATAATGAATAGATTTTACTGATGATTCATCATCA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5001\n+GATGATGAATCATCAGTAAAATCTATTCATTATCTC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMK\n+@frag_5002\n+TGAGATAATGAATAGATTTTACTGATGATTCATCAT\n++\n+KKMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5003\n+TGATGAATCATCAGTAAAATCTATTCATTATCTCAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMKKK\n+@frag_5004\n+ATTGAGATAATGAATAGATTTTACTGATGATTCATC\n++\n+KKKKMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5005\n+ATGAATCATCAGTAAAATCTATTCATTATCTCAATA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMKKKK#\n+@frag_5006\n+CTATTGAGATAATGAATAGATTTTACTGATGATTCA\n++\n+##KKKKMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5007\n+GAATCATCAGTAAAATCTATTCATTATCTCAATAGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMKKKK##%\n+@frag_5008\n+AGCTATTGAGATAATGAATAGATTTTACTGATGATT\n++\n+'%##KKKKMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5009\n+ATCATCAGTAAAATCTATTCATTATCTCAATAGCTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMKKKK##%')\n+@frag_5010\n+AAAGCTATTGAGATAATGAATAGATTTTACTGATGA\n++\n++)'%##KKKKMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5011\n+CATCAGTAAAATCTATTCATTATCTCAATAGCTTTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMKKKK##%')+.\n+@frag_5012\n+GAAAAGCTATTGAGATAATGAATAGATTTTACTGAT\n++\n+0.+)'%##KKKKMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5013\n+TCAGTAAAATCTATTCATTATCTCAATAGCTTTTCA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMKKKK##%')+.02\n+@frag_5014\n+ATGAAAAGCTATTGAGATAATGAATAGATTTTACTG\n++\n+420.+)'%##KKKKMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5015\n+AGTAAAATCTATTCATTATCTCAATAGCTTTTCATT\n++\n+MMMMMMMMMMMMMMMMMMMMMKKKK##%')+.024J\n+@frag_5016\n+GAATGAAAAGCTATTGAGATAATGAATAGATTTTAC\n++\n+MJ420.+)'%##KKKKMMMMMMMMMMMMMMMMMMMM\n+@frag_5017\n+TAAAATCTATTCATTATCTCAATAGCTTTTCATTCT\n++\n+MMMMMMMMMMMMMMMMMMMKKKK##%')+.024JMM\n+@frag_5018\n+CAGAATGAAAAGCTATTGAGATAATGAATAGATTTT\n++\n+MMMJ420.+)'%##KKKKMMMMMMMMMMMMMMMMMM\n+@frag_5019\n+AAATCTATTCATTATCTCAATAGCTTTTCATTCTGA\n++\n+MMMMMMMMMMMMMMMMMKKKK##%')+.024JMMMM\n+@frag_5020\n+GTCAGAATGAAAAGCTATTGAGATAATGAATAGATT\n++\n+MMMMMJ420.+)'%##KKKKMMMMMMMMMMMMMMMM\n+@frag_5021\n+ATCTATTCATTATCTCAATAGCTTTTCATTCTGACT\n++\n+MMMMMMMMMMMMMMMKKKK##%')+.024JMMMMMM\n+@frag_5022\n+CAGTCAGAATGAAAAGCTATTGAGATAATGAATAGA\n++\n+MMMMMMMJ420.+)'%##KKKKMMMMMMMMMMMMMM\n+@frag_5023\n+CTATTCATTATCTCAATAGCTTTTCATTCTGACTGC\n++\n+MMMMMMMMMMMMMKKKK##%')+.024JMMMMMMMM\n+@frag_5024\n+TGCAGTCAGAATGAAAAGCTATTGAGATAATGAATA\n++\n+MMMMMMMMMJ420.+)'%##KKKKMMMMMMMMMMMM\n+@frag_5025\n+ATTCATTATCTCAATAGCTTTTCATTCTGACTGCAA\n++\n+MMMMMMMMMMMKKKK##%')+.024JMMMMMMMMMM\n+@frag_5026\n+GTTGCAGTCAGAATGAAAAGCTATTGAGATAATGAA\n++\n+MMMMMMMMMMMJ420.+)'%##KKKKMMMMMMMMMM\n+@frag_5027\n+TCATTATCTCAATAGCTTTTCATTCTGACTGCAACG\n++\n+MMMMMMMMMKKKK##%')+.024JMMMMMMMMMMMM\n+@frag_5028\n+CCGTTGCAGTCAGAATGAAAAGCTATTGAGATAATG\n++\n+MMMMMMMMMMMMMJ420.+)'%##KKKKMMMMMMMM\n+@frag_5029\n+ATTATCTCAATAGCTTTTCATTCTGACTGCAACGGG\n++\n+MMMMMMMKKKK##%')+.024JMMMMMMMMMMMMMM\n+@frag_5030\n+GCCCGTTGCAGTCAGAATGAAAAGCTATTGAGATAA\n++\n+MMMMMMMMMMMMMMMJ420.+)'%##KKKKMMMMMM\n+@frag_5031\n+TATCTCAATAGCTTTTCATTCTGACTGCAACGGGCA\n++\n+MMMMMKKKK##%')+.024JMMMMMMMMMMMMMMMM\n+@frag_5032\n+TTGCCCGTTGCAGTCAGAATGAAAAGCTATTGAGAT\n++\n+MMMMMMMMMMMMMMMMMJ420.+)'%##KKKKMMMM\n+@frag_5033\n+TCTCAATAGCTTTTCATTCTGACTGCAACGGGCAAT\n++\n+MMMKKKK##%')+.024JMMMMMMMMMMMMMMMMMM\n+@frag_5034\n+TATTGCCCGTTGCAGTCAGAATGAAAAGCTATTGAG\n++\n+MMMMMMMMMMMMMMMMMMMJ420.+)'%##KKKKMM\n+@frag_5035\n+TCAATAGCTTTTCATTCTGACTGCAACGGGCAATAT\n++\n+MKKKK##%')+.024JMMMMMMMMMMMMMMMMMMMM\n+@frag_5036\n+CATATTGCCCGTTGCAGTCAGAATGAAAAGCTATTG\n++\n+MMMMMMMMMMMMMMMMMMMMMJ420.+)'%##KKKK\n+@frag_5037\n+AATAGCTTTTCATTCTGACTGCAACGGGCAATATGT\n++\n+KKK##%')+.024JMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5038\n+GACATATTGCCCGTTGCAGTCAGAATGAAAAGCTAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%##KK\n+@frag_5039\n+TAGCTTTTCATTCTGACTGCAACGGGCAATATGTCT\n++\n+K##%')+.024JMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5039_a\n+AGACATATTGCCCGTTGCAGTCAGAATGAAAAGCTA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%##K\n"
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/ecoli.mira4_de_novo.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecoli.mira4_de_novo.fasta Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,86 @@
+>MIRA_c1
+gccagggctattttaccggcgcagtatcgccgccaggattgcattgcgcacgggcgacat
+ctggcaggcttcattcacgcctgctattcccgtcagcctgagcttgccgcgaagctgatg
+aaagatgttatcgctgaaccctaccgtgaacggttactgccaggcttccggcaggcgcgg
+caggcggtcgcggaaatcggcgcggtagcgagcggtatctccggctccggcccgaccttg
+ttcgctctgtgtgacaagccggaaaccgcccagcgcgttgccgactggttgggtaagaac
+tacctgcaaaatcaggaaggttttgttcatatttgccggctggatacggcgggcgcacga
+gtactggaaaactaaatgaaactctacaatctgaaagatcacaacgagcaggtcagcttt
+gcgcaagccgtaacccaggggttgggcaaaaatcaggggctgttttttccgcacgacctg
+ccggaattcagcctgactgaaattgatgagatgctgaagctggattttgtcacccgcagt
+gcgaagatcctctcggcgtttattggtgatgaaatcccacaggaaatcctggaagagcgc
+gtgcgcgcggcgtttgccttcccggctccggtcgccaatgttgaaagcgatgtcggttgt
+ctggaattgttccacgggccaacgctggcatttaaagatttcggcggtcgctttatggca
+caaatgctgacccatattgcgggtgataagccagtgaccattctgaccgcgacctccggt
+gataccggagcggcagtggctcatgctttctacggtttaccgaatgtgaaagtggttatc
+ctctatccacgaggcaaaatcagtccactgcaagaaaaactgttctgtacattgggcggc
+aatatcgaaactgttgccatcgacggcgatttcgatgcctgtcaggcgctggtgaagcag
+gcgtttgatgatgaagaactgaaagtggcgctagggttaaactcggctaactcgattaac
+atcagccgtttgctggcgcagatttgctactactttgaagctgttgcgcagctgccgcag
+gagacgcgcaaccagctggttgtctcggtgccaagcggaaacttcggcgatttgacggcg
+ggtctgctggcgaagtcactcggtctgccggtgaaacgttttattgctgcgaccaacgtg
+aacgataccgtgccacgtttcctgcacgacggtcagtggtcacccaaagcgactcaggcg
+acgttatccaacgcgatggacgtgagtcagccgaacaactggccgcgtgtggaagagttg
+ttccgccgcaaaatctggcaactgaaagagctgggttatgcagccgtggatgatgaaacc
+acgcaacagacaatgcgtgagttaaaagaactgggctacacttcggagccgcacgctgcc
+gtagcttatcgtgcgctgcgtgatcagttgaatccaggcgaatatggcttgttcctcggc
+accgcgcatccggcgaaatttaaagagagcgtggaagcgattctcggtgaaacgttggat
+ctgccaaaagagctggcagaacgtgctgatttacccttgctttcacataatctgcccgcc
+gattttgctgcgttgcgtaaattgatgatgaatcatcagtaaaatctattcattatctca
+atagcttttcattctgactgcaacgggcaatatgtctctgtgtggattaaaaaaagagtg
+tctgatagcagcttctgaactggttacctgccgtgagtaaattaaaattttattgactta
+ggtcactaaatactttaaccaatataggcatagcgcacagacagataaaaattacagagt
+acacaacatccatgaaacgcattagcaccaccattaccaccaccatcaccattaccacaa
+cggtgcgggctgacgcgtacaggaaacacagaaaaaagcccgcacctgacagtgcgggct
+ttttttttcgaccaaaggtaacgaggtaacaaccatgcgagagttgaagttcggcggtac
+atcagtggcaaatgcagaacgttttctgcgtgttgccgatattctggaaagcaatgccag
+gcaggggcaggtggccaccgtcctctctgcccccgccaaaacaccaaccacctggtggcg
+atgattgaaaaaaccattagcggccaggatgctttacccaatatcagcgatgccgaacgt
+atttttgccgaacttttgacgggactcgccgccgcccagccggggttcccgctggcgacg
+tcaattgaaaactttcgtcgatcaggaatttgcccaaataaaacatgtcctgcatggcat
+tagtttgttggggcagtgcccggatagcatcaacgctgcgctgatttgccgtggcgagaa
+aatgtcgatcgccgttatggccggcgtattagaagcgcgcggtcacaacgttactgttat
+cgatccggtcgaaaaactgctggcagtggggcattacctcgaatctaccgtcgatattgc
+tgagtccacccgccgtattgcggcaagccgcattccggctgatcacatggtgctgatggc
+aggtttcaccgccggtaatgaaaaaggcgaactggtggtgcttggacgcaacggttccga
+ctactctgctgcggtgctggctgcctgtttacgcgccgattgttgcgagatttggacgga
+cgttgacggggtctatacctgcgacccgcgtcaggtgcccgatgcgaggttgttgaagtc
+gatgtcctaccaggaagcgatggagctttcctacttcggcgctaaagttcttcacccccg
+caccattacccccatcgcccagttccagatcccttgcctgattaaaaataccggaaatcc
+tcaagcaccaggtacgctcattggtgccagccgtgatgaagacgaattaccggtcaaggg
+catttccaatctgaataacatggcaatgttcagcgtttctggtccggggatgaaagggat
+ggtcggcatggcggcgcgcgtctttgcagcgatgtcacgcgcccgtatttccgtggtgct
+gattacgcaatcatcttccgaatacagcatcagtttctgcgttccacaaagcgactgtgt
+gcgagctgaacgggcaatgcaggaagagttctacctggaactgaaagaaggcttactgga
+gccgctggcagtgacggaacggctggccattatctcggtggtaggtgatggtatgcgcac
+cttgcgtgggatctcggcgaaattctttgccgcactggcccgcgccaatatcaacattgt
+cgccattgctcagggatcttctgaacgctcaatctctgtcgtggtaaataacgatgatgc
+gaccactggcgtgcgcgttactcatcagatgctgttcaataccgatcaggttatcgaagt
+gtttgtgattggcgtcggtggcgttggcggtgcgctgctggagcaactgaagcgtcagca
+aagctggctgaagaataaacatatcgacttacgtgtctgcggtgttgccaactcgaaggc
+tctgctcaccaatgtacatggccttaatctggaaaactggcaggaagaactggcgcaagc
+caaagagccgtttaatctcgggcgcttaattcgcctcgtgaaagaatatcatctgctgaa
+cccggtcattgttgactgcacttccagccaggcagtggcggatcaatatgccgacttcct
+gcgcgaaggtttccacgttgtcacgccgaacaaaaaggccaacacctcgtcgatggatta
+ctaccatcagttgcgttatgcggcggaaaaatcgcggcgtaaattcctctatgacaccaa
+cgttggggctggattaccggttattgagaacctgcaaaatctgctcaatgcaggtgatga
+attgatgaagttctccggcattctttctggttcgctttcttatatcttcggcaagttaga
+cgaaggcatgagtttctccgaggcgaccacgctggcgcgggaaatgggttataccgaacc
+ggacccgcgagatgatctttctggtatggatgtggcgcgtaaactattgattctcgctcg
+tgaaacgggacgtgaactggagctggcggatattgaaattgaacctgtgctgcccgcaga
+gtttaacgccgagggtgatgttgccgcttttatggcgaatctgtcacaactcgacgatct
+ctttgccgcgcgcgtggcgaaggcccgtgatgaaggaaaagttttgcgctatgttggcaa
+tattgatgaagatggcgtctgccgcgtgaagattgccgaagtggatggtaatgatccgct
+gttcaaagtgaaaaatggcgaaaacgccctggccttctatagccactattatcagccgct
+gccgttggtactgcgcggatatggtgcgggcaatgacgttacagctgccggtgtctttgc
+tgatctgctacgtaccctctcatggaagttaggagtctgacatggttaaagtttatgccc
+cggcttccagtgccaatatgagcgtcgggtttgatgtgctcggggcggcggtgacacctg
+ttgatggtgcattgctcggagatgtagtcacggttgaggcggcagagacattcagtctca
+acaacctcggacgctttgccgataagctgccgtcagaaccacgggaaaatatcgtttatc
+agtgctgggagcgtttttgccaggaactgggtaagcaaattccagtggcgatgaccctgg
+aaaagaatatgccgatcggttcgggcttaggctccagtgcctgttcggtggtcgcggcgc
+tgatggcgatgaatgaacactgcggcaagccgcttaatgacactcgtttgctggctttga
+tgggcgagctggaaggccgtatctccggcagcattcattacgacaacgtggcaccgtgtt
+ttctcggtggtatgcagttgatgatcgaagaaaacgacatcatcagccagcaagtgccag
+ggtttgatgagtggctgtgggtgctggcgtatccggggattaaagtctcgacggcagaag
+ccagggctattttaccggcgcagtatcgccgcca
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/header.mira
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/header.mira Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,2 @@
+@Version 2 0
+@Program MIRALIB
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_bait.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_bait.fasta Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,2 @@
+>fragment (intended to match some but not all of tvs_mini.fastq)
+TTAGCCtttcgagcggccgcccgggcaggtctgaaaaacaccgcaaaatgccggcgggtcacggtttggggtcgcgcaccagagatctatttgctcgggcattcaggaAGCCTT
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_contigs.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_contigs.fasta Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,20 @@
+>mira_c1
+ttagcgtggtcgcggccgaggtaccctctaccatgaaaccaggcttgggtccctctggct
+gtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgttct
+tcttgatcctctccatgatctcctcatggcaccttgatggctggacatgttccacacgaa
+catgaatcctcttccttatgattctgtttccaacctgcttgttgacctcaacaccaacag
+cacgcttggtgacgttccatacccgacccgtgcggccatgatagaacttgtggggcatac
+ctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatacgaa
+ggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatctctgg
+tgcgcgaccccaaaccgtgacccgccggcattttgcggtgtttttcagacctgcccgggc
+ggccgctcgaaa
+>mira_c2
+tttcgagcggycgcccggscgaggtaccctscaccatgaaaccaggcttgggtccctcwg
+gctgyctcttggtgctgataatcttwccytgtgccttkgcctcagccttcaacttatcrt
+tcttcttgatcctctccattatctcctcatggcamckagatggctggacatgttccacac
+gaacatgaatcctcttccttatgattctgtttccaacctgyttgttgacctcaacaccaa
+cagcgcgcttggtgacgttccatacccgacccgtgcggccatggtagaacttgtggggca
+tacctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatac
+gaaggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatccc
+tggtgcgtgacctcaaaccgtgacccgccggcattttgaggtgtttttcagacctgcccg
+ggcggccgctcgaaa
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_map_ref_strain.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_map_ref_strain.fasta Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,20 @@
+>mira_c1_bb
+ttagcgtggtcgcggccgaggtaccctctaccatgaaaccaggcttgggtccctctggct
+gtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgttct
+tcttgatcctctccatgatctcctcatggcaccttgatggctggacatgttccacacgaa
+catgaatcctcttccttatgattctgtttccaacctgcttgttgacctcaacaccaacag
+cacgcttggtgacgttccatacccgacccgtgcggccatgatagaacttgtggggcatac
+ctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatacgaa
+ggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatctctgg
+tgcgcgaccccaaaccgtgacccgccggcattttgcggtgtttttcagacctgcccgggc
+ggccgctcgaaa
+>mira_c2_bb
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaccatgaaaccaggcttgggtccctctg
+gctgtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgt
+tcttcttgatcctctccattatctcctcatggcamckagatggctggacatgttccacac
+gaacatgaatcctcttccttatgatyctgttwccaacctgyttgttgacctcaacaccaa
+cagcgcgcttggtgacgttccatacccgacccgtgcggccatggtagaacttgtggggca
+tacctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatac
+gaaggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatcyc
+tggtgcgtgacctcaaaccgtgacccgccggcattttgaggtgtttttcagacctgcccg
+ggcggccgctcgaaa
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_map_same_strain.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_map_same_strain.fasta Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,20 @@
+>mira_c1_bb
+ttagcgtggtcgcggccgaggtaccctctaccatgaaaccaggcttgggtccctctggct
+gtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgttct
+tcttgatcctctccatgatctcctcatggcaccttgatggctggacatgttccacacgaa
+catgaatcctcttccttatgattctgtttccaacctgcttgttgacctcaacaccaacag
+cacgcttggtgacgttccatacccgacccgtgcggccatgatagaacttgtggggcatac
+ctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatacgaa
+ggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatctctgg
+tgcgcgaccccaaaccgtgacccgccggcattttgcggtgtttttcagacctgcccgggc
+ggccgctcgaaa
+>mira_c2_bb
+tttcgagcggncgcccggncgaggtaccctncaccatgaaaccaggcttgggtccctctg
+gctgtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgt
+tcttcttgatcctctccattatctcctcatggcamckagatggctggacatgttccacac
+gaacatgaatcctcttccttatgattctgtttccaacctgyttgttgacctcaacaccaa
+cagcgcgcttggtgacgttccatacccgacccgtgcggccatggtagaacttgtggggca
+tacctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatac
+gaaggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatccc
+tggtgcgtgacctcaaaccgtgacccgccggcattttgaggtgtttttcagacctgcccg
+ggcggccgctcgaaa
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_mini.fastq Fri Nov 21 06:42:56 2014 -0500
b
b"@@ -0,0 +1,24 @@\n+@gnlti136477918\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTACCCTCCACCATGAAACCAGGCTTGGGTCCCTCAGGCTGCCTCTTGGTGCTGATAATCTTTCCCTGTGCCTTTGCCTCAGCCTTCAACTTATCATTCTTCTTGATCCTCTCCATTATCTCCTCATGGCAACGAGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATCCTGTTACCAACCTGTTTGTTGACCTCAACACCAACAGCGCGCTTGGTAACATTCCAGACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGAACTCAAACCGTGACCCGCCGGCATTTTGAGGTGTTTTTCAGCTGCCTTGTTCACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n++\n+38<>><><<96-++42:AABBCCCCCCCCDFFFIYYIIIIIINTTTTNNNNNNTTTTTTNNIIIIHHHHHHYIFFFIDIINIITTTOQDDDHHHNTYTFFFIIINNIITDDDDDDFLLTTTLLLYYYYYYFFIIIILKOOYYYNNNNNNOOKKKKILLLFFIOOTTNLLLLLNYYYYKYFDDDLLLNNNTTNNLMKKSYNNJIIGGGGLLIILOOYYYYYYYYYTNNNNNTYYYYYYYTOLLLLLLNTTYYTTTLLTYYKKKONNNNLLLLGGINIIIIIINNNNNNNNIHHHHHHHHHHINIITTTTNNNNNTYTNNNNIIIFFDHHHFFINNNNIHHHDDEIDDDNNDDKQQQQMMMQQYYNNIDCBBBBAHIGGGKYYYOOD?<AACCCCCHCCC@>>>>HBBAAAA>@999AOOOYIIICC<<,,,99HHHFKK??C>>B>>H?6/+))42856301:7<>HHEI4/))-10449--0..((*4))*35A<9+++44>BB754---@<;42*))45:7024.(')))')++049>>41-'(,'(.2393222/3171((((-.4011/0+).)''),..4133><B=451119411+))<44:686:/066888888=::884))*'''**,''*-.''*,/2(*144+')64>;1/,'')''1*30+0..****(*0-.4-)*),'(''+,-((*+))**+,''''''***''***-*)121,''''(+*,,+-((****.0..,0*))*(),))''))*+,*)()))''''+'')'')**)()'','')'(**((*((*(((*441.-*****())+*''')-++*****-*((((**))))))*)))++***)(**11.()****0*-,((*--.***,((,,,**'')'''')'-((--,''**441***)+'(''*,*(\n+@gnlti136478624\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCACGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGATAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCTCTGGGTGCGGGAACTCAAACCGGGAACCGGCGGCATTTTGCGGTGTTTTTAGACCCTGCCGGGGGGGCGGTCGAAAGGCCGATTCTTGAGATTTTCCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXGGGGGTAGGAGGTTGTAATTGGAAAAACCTGGGGTAGCAAGTTAATGGCTTGAGCAATTCCGTTCGGCGGGTGGGTATAGAGAAGGGGCGGGCGATCGGGATCCGAAGATGGGGAGCGGATGGGGAGGAGAGGCAGGTGGGGATATAGGGGGGGGGTGGGGTAGGGGAGGGGCGGTGCTGTAGGGGGAGGGGCGGCGTTGGTTTTCTGTGTTACGAGTTGGGTGACCCGAAGTAATTGGGG\n++\n++1449>>>;=::AADDCCCCFIICCBB>???BBDDDDYYHBCCBBFF@@777BBG@>7584;;@DHDDDDDDMNIIIIIYYYTOYKKKMIDDDDDHOKKKQSTTTNNIIYYFFFFFIDDDDDIYOOIIDAA>DADDFDLLDDDIKKKKOKKKKKKYYYOOJJJOYYYYTOOKKPMMMMMSSSSMMMSSYYYYLJIIIID=====FKKKKKKYYYOOKKIIIIISSFFDIHIIKSSTOOKKKLYYSSKMIIIOOIIIDDDDHDDDIOOIIFFFIIIIKKKMIIIIIIMKKKKIIIFDDDDADDIDDDDDDDHDDDDFFF99///<<HFFFFFFFFGOOYTDDDHHH99,,,95>>>>47//-</3-822.446777BBBFFIOC>6.++-53:?:>7744213...772007:9:-++33>>DH>>??933;;FQ<93/+10++/.//-10234:1//223;:/,,***++'')'+,/)))-.2.++((.0***,))*,0(())''))))+'')***''))***))),669+,*****..''')*,**,*))))*'',)))'(++,++((*+*)*.*))''')***''.*))'')''''''***+)))++**(''''')****)''')'(***''**+/.)))*)')((''***(('')'')-))''''.'')))**'+''''**))''))***+((***)%(((***(((((,.,,(((((*(((+.(()'''')*(())(***((**-+,,)')''*/,''''**'''))((''*+((''''))*))'')'')),.)())'''''('*)**+***-*(')''))((+++0***(('')'')**()++*+**(')).5+*'''')*,---'''')'''\n+@gnlti136478626\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCTCAAAATGCCGGCGGGTCACGGTTTGAGGTCACGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTACCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGCGCTGTT"..b"TGGGTGTGGGGGTGGGTTTTGTTAGGCGAGGTGGTTTTTTTTTCTTTTTTTTTTTAGTGGAGGTGT\n++\n+04--46:<<B<<>@>HHEB<822<<IEHIHCCCCCCIIIITTIIIIIINNTTTTTYTIHHHHHHNNNIIDDDDFFNTKM>>?OQFFFMKOOTFDDDDHHIIIIOHFFFFFINTDDAAAADHHDDDOYNNHHFFDDDDDDFDC=AA=DIIIIFFNHHFFFFNNNNNNNNNNDD448DNTOOKKKOBB?DFGGGNOTOO555>>A>>>AAF:::>>@DB=====5AACOIIBCCBB<5005<41''+18EAAAHHHB>96-+,+14:AAIB??>>CD>>;87>5:30-14477<>@CDDD>>?==MQYI>H---88:77:<B>>=33000008<9::>BBBFHHCCC>IFDDDOOOQIQQII:2((+6<552228>DDDEH>>33399>31)''-.FFIMIIIO>>333;@II>71:37<AAEIAA778<B69,,,01BBIKFF>>>944,,,6:6/(((*44<<43,,,66AEH98,,,6/+**--..((*,1><::65/0*'))'(,-,)++*31+((*((**.,+*'')'()'''*++))''('*+26410''''+)(())''))*'(***++*))*((****(''''')++**)+'')*))*.-***))*)*-/****,-30.)''''''''***''''')-.*))'')''**++*))/,,((,+-***+)'''''')+'''.*)')'(0-+((+++)))'''(*+'''')'(**,***''''))*))'')))),''))''))*((***))/()(*''''++**((((((****(')))))*))'),))'')''.)))))))'')+,++')-))'(+))***))''))****++))))+1-**))**'(140''))**))'')+**(\n+@gnlti136479357\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTGTGGTCGCGGCCGAGGTACCCTGCACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGGACTCCAAACGTGAACTTGCCGGGCGGGGGGGAGAGGGGGAGCGGGGGGGGGGGAGAATAAGGGGGGAGGGGAGGGGGAGAGAAAAGGGAGGAGGGGGGGGGTAGGGAGGGAGAGGGAGGGGGGAGGGGGGGGGAGGGGGGGGGGGGAGGGAAGGGGGGGGAGGGGGGGAGGGGGAGGGGAGAGGGGGAGGGGAGGGGGGGGGGAGGGGAGGGGAAGAGGGGGGGGGGGGGGGGGGGAGGGGGGGAGGGGGGGGGGGAGGGGGGGGGGGGGGAGGGAGAAGGAGAAA\n++\n+.4<BB;>>>>>>>>FDCCCCCCIINIIIDCCCCCCDDDDYQKKFNNNCAAAAAINNIIINTIIHHDDDDDDDDDDDKITTTTLYYYYYLFFIIIILOKKKIIIIKKOKYFDDDDDFIIIIIKKKLLLLTIDDDDDFFDDDDDNNIIIIIKKDDDDHHJOYYSSMMFFADDDDLYSSB>666>BDDDDKOOKJJOOJJED==99AOIJJOOYYYLJJJLLTTTTLYYYYYYYYYYLLIIBBADDNOIIIIIINDAADDDDKOOIIIIIFDDA>7==@@DII??887BBOOFDDDDDIYYNNNHDDKOO?BBHHINODDAF>A>AADFFIIOGFFFFIITOOIDDDDDDDDDDDHHD89,,,<>FFFDD>99<<<B<845;<BAAA;>99=EBIIIIIOOD@@><>AB<8::AA:>AABHIHHHCCC99--+46CCCIIIIAA551-4440++)))4499+))019<>>>1/()0/-('''129.,//+((**+++8@@,*)11))*+***+++))%(,.*)))..,.2+**+8..)))),*))'')))''.''+)*++)+)))''))'(++++*))'''''))****))''))/.03:=741.''**),''''**))))))4**)')'').11.('*))'%)*-.2))*.0('''))(')''))****))('+'')''))****,((((**))1..''))***)1-1-.''),,''))%(.**)(')))*)-().-.***))1)''+''))****))''\n+@gnlti136479522\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n++\n+(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+\n"
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini_bait_neg.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_mini_bait_neg.fastq Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,16 @@
+@gnlti136477918
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTACCCTCCACCATGAAACCAGGCTTGGGTCCCTCAGGCTGCCTCTTGGTGCTGATAATCTTTCCCTGTGCCTTTGCCTCAGCCTTCAACTTATCATTCTTCTTGATCCTCTCCATTATCTCCTCATGGCAACGAGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATCCTGTTACCAACCTGTTTGTTGACCTCAACACCAACAGCGCGCTTGGTAACATTCCAGACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGAACTCAAACCGTGACCCGCCGGCATTTTGAGGTGTTTTTCAGCTGCCTTGTTCACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
++
+38<>><><<96-++42:AABBCCCCCCCCDFFFIYYIIIIIINTTTTNNNNNNTTTTTTNNIIIIHHHHHHYIFFFIDIINIITTTOQDDDHHHNTYTFFFIIINNIITDDDDDDFLLTTTLLLYYYYYYFFIIIILKOOYYYNNNNNNOOKKKKILLLFFIOOTTNLLLLLNYYYYKYFDDDLLLNNNTTNNLMKKSYNNJIIGGGGLLIILOOYYYYYYYYYTNNNNNTYYYYYYYTOLLLLLLNTTYYTTTLLTYYKKKONNNNLLLLGGINIIIIIINNNNNNNNIHHHHHHHHHHINIITTTTNNNNNTYTNNNNIIIFFDHHHFFINNNNIHHHDDEIDDDNNDDKQQQQMMMQQYYNNIDCBBBBAHIGGGKYYYOOD?<AACCCCCHCCC@>>>>HBBAAAA>@999AOOOYIIICC<<,,,99HHHFKK??C>>B>>H?6/+))42856301:7<>HHEI4/))-10449--0..((*4))*35A<9+++44>BB754---@<;42*))45:7024.(')))')++049>>41-'(,'(.2393222/3171((((-.4011/0+).)''),..4133><B=451119411+))<44:686:/066888888=::884))*'''**,''*-.''*,/2(*144+')64>;1/,'')''1*30+0..****(*0-.4-)*),'(''+,-((*+))**+,''''''***''***-*)121,''''(+*,,+-((****.0..,0*))*(),))''))*+,*)()))''''+'')'')**)()'','')'(**((*((*(((*441.-*****())+*''')-++*****-*((((**))))))*)))++***)(**11.()****0*-,((*--.***,((,,,**'')'''')'-((--,''**441***)+'(''*,*(
+@gnlti136478624
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCACGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGATAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCTCTGGGTGCGGGAACTCAAACCGGGAACCGGCGGCATTTTGCGGTGTTTTTAGACCCTGCCGGGGGGGCGGTCGAAAGGCCGATTCTTGAGATTTTCCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXGGGGGTAGGAGGTTGTAATTGGAAAAACCTGGGGTAGCAAGTTAATGGCTTGAGCAATTCCGTTCGGCGGGTGGGTATAGAGAAGGGGCGGGCGATCGGGATCCGAAGATGGGGAGCGGATGGGGAGGAGAGGCAGGTGGGGATATAGGGGGGGGGTGGGGTAGGGGAGGGGCGGTGCTGTAGGGGGAGGGGCGGCGTTGGTTTTCTGTGTTACGAGTTGGGTGACCCGAAGTAATTGGGG
++
++1449>>>;=::AADDCCCCFIICCBB>???BBDDDDYYHBCCBBFF@@777BBG@>7584;;@DHDDDDDDMNIIIIIYYYTOYKKKMIDDDDDHOKKKQSTTTNNIIYYFFFFFIDDDDDIYOOIIDAA>DADDFDLLDDDIKKKKOKKKKKKYYYOOJJJOYYYYTOOKKPMMMMMSSSSMMMSSYYYYLJIIIID=====FKKKKKKYYYOOKKIIIIISSFFDIHIIKSSTOOKKKLYYSSKMIIIOOIIIDDDDHDDDIOOIIFFFIIIIKKKMIIIIIIMKKKKIIIFDDDDADDIDDDDDDDHDDDDFFF99///<<HFFFFFFFFGOOYTDDDHHH99,,,95>>>>47//-</3-822.446777BBBFFIOC>6.++-53:?:>7744213...772007:9:-++33>>DH>>??933;;FQ<93/+10++/.//-10234:1//223;:/,,***++'')'+,/)))-.2.++((.0***,))*,0(())''))))+'')***''))***))),669+,*****..''')*,**,*))))*'',)))'(++,++((*+*)*.*))''')***''.*))'')''''''***+)))++**(''''')****)''')'(***''**+/.)))*)')((''***(('')'')-))''''.'')))**'+''''**))''))***+((***)%(((***(((((,.,,(((((*(((+.(()'''')*(())(***((**-+,,)')''*/,''''**'''))((''*+((''''))*))'')'')),.)())'''''('*)**+***-*(')''))((+++0***(('')'')**()++*+**(')).5+*'''')*,---'''')'''
+@gnlti136479063
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCCCTGGTGCGTGAGCTCAAAGCGTGGACCGGCGGCATTTTAGGGTGTTTTTCAGCTGGCTCGGTGGTTTGAATGTGACTTGGGCGGGGGGGGGGTCGAAAGGCGAATTTGGAGATTTTCATAAAATTGGGGCGGTTGAAGATTGATTTTAAGGGGCAATTTGGGCTATAGGGGTGGTTTTAAATTAATGGGGGTGGTTTAAAAGTGTGATGGGGAAACGTGGGTTACCAATTTATGGGTGTGTGGAGTTCCCTTTGTGAGGTGGTATAGGAAAGGGGGGGCGTGACCTGCCACGTGGGGGGGGAAGTGTATGGGGGCGGGTTGGGGGGTTGAGGGGGGTGTGGGTGTGGGGGTGGGTTTTGTTAGGCGAGGTGGTTTTTTTTTCTTTTTTTTTTTAGTGGAGGTGT
++
+04--46:<<B<<>@>HHEB<822<<IEHIHCCCCCCIIIITTIIIIIINNTTTTTYTIHHHHHHNNNIIDDDDFFNTKM>>?OQFFFMKOOTFDDDDHHIIIIOHFFFFFINTDDAAAADHHDDDOYNNHHFFDDDDDDFDC=AA=DIIIIFFNHHFFFFNNNNNNNNNNDD448DNTOOKKKOBB?DFGGGNOTOO555>>A>>>AAF:::>>@DB=====5AACOIIBCCBB<5005<41''+18EAAAHHHB>96-+,+14:AAIB??>>CD>>;87>5:30-14477<>@CDDD>>?==MQYI>H---88:77:<B>>=33000008<9::>BBBFHHCCC>IFDDDOOOQIQQII:2((+6<552228>DDDEH>>33399>31)''-.FFIMIIIO>>333;@II>71:37<AAEIAA778<B69,,,01BBIKFF>>>944,,,6:6/(((*44<<43,,,66AEH98,,,6/+**--..((*,1><::65/0*'))'(,-,)++*31+((*((**.,+*'')'()'''*++))''('*+26410''''+)(())''))*'(***++*))*((****(''''')++**)+'')*))*.-***))*)*-/****,-30.)''''''''***''''')-.*))'')''**++*))/,,((,+-***+)'''''')+'''.*)')'(0-+((+++)))'''(*+'''')'(**,***''''))*))'')))),''))''))*((***))/()(*''''++**((((((****(')))))*))'),))'')''.)))))))'')+,++')-))'(+))***))''))****++))))+1-**))**'(140''))**))'')+**(
+@gnlti136479357
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTGTGGTCGCGGCCGAGGTACCCTGCACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGGACTCCAAACGTGAACTTGCCGGGCGGGGGGGAGAGGGGGAGCGGGGGGGGGGGAGAATAAGGGGGGAGGGGAGGGGGAGAGAAAAGGGAGGAGGGGGGGGGTAGGGAGGGAGAGGGAGGGGGGAGGGGGGGGGAGGGGGGGGGGGGAGGGAAGGGGGGGGAGGGGGGGAGGGGGAGGGGAGAGGGGGAGGGGAGGGGGGGGGGAGGGGAGGGGAAGAGGGGGGGGGGGGGGGGGGGAGGGGGGGAGGGGGGGGGGGAGGGGGGGGGGGGGGAGGGAGAAGGAGAAA
++
+.4<BB;>>>>>>>>FDCCCCCCIINIIIDCCCCCCDDDDYQKKFNNNCAAAAAINNIIINTIIHHDDDDDDDDDDDKITTTTLYYYYYLFFIIIILOKKKIIIIKKOKYFDDDDDFIIIIIKKKLLLLTIDDDDDFFDDDDDNNIIIIIKKDDDDHHJOYYSSMMFFADDDDLYSSB>666>BDDDDKOOKJJOOJJED==99AOIJJOOYYYLJJJLLTTTTLYYYYYYYYYYLLIIBBADDNOIIIIIINDAADDDDKOOIIIIIFDDA>7==@@DII??887BBOOFDDDDDIYYNNNHDDKOO?BBHHINODDAF>A>AADFFIIOGFFFFIITOOIDDDDDDDDDDDHHD89,,,<>FFFDD>99<<<B<845;<BAAA;>99=EBIIIIIOOD@@><>AB<8::AA:>AABHIHHHCCC99--+46CCCIIIIAA551-4440++)))4499+))019<>>>1/()0/-('''129.,//+((**+++8@@,*)11))*+***+++))%(,.*)))..,.2+**+8..)))),*))'')))''.''+)*++)+)))''))'(++++*))'''''))****))''))/.03:=741.''**),''''**))))))4**)')'').11.('*))'%)*-.2))*.0('''))(')''))****))('+'')''))****,((((**))1..''))***)1-1-.''),,''))%(.**)(')))*)-().-.***))1)''+''))****))''
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini_bait_pos.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_mini_bait_pos.fastq Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,8 @@
+@gnlti136478626
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCTCAAAATGCCGGCGGGTCACGGTTTGAGGTCACGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTACCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGCGCTGTTGGTGTTGAGGTCAAGCATGAGGTGGGAAACAGAATCATAAGGAAAAGGATTCATGTGCGTGTGGAAGATGGTCAGGGATCGAGGTGCCATGAGGAGATCATGGTGAGGATCGAGAAGAACGATTAAGGGGAAGGCTGAGGGCGAGGGACAAGGTAAGATTATCAGCGACAAGAGACAAGCGGAGGGAGCCACGGCTGGGTGTGTGGTAGAGGGTGCCTCGGGCCGGGGCAAGGCTAAGCCGAATGCTGGGGATATTCATTAGACTGGGGGGCGGTCGAGGGTGGGTCGTAATGGGCCATTTGCGCGTATGGTGGGGTTGTTTGACATTGCGCTGGCCTCGGTTTACAGGGTTGTGATTGGAAAGCCGTGCGGTTGCCAACGTTAGTGTTTGGGAGACGTTCGCGTTCGGGGGCTGGGGTATTAAGGGGGGTCTGGGGTAAGGGCGTGCGAGGATGGTGGAGGGGTTTGGGGTTGGGCGTCTGTTTCGGGGTTTGTGGCGGGGGGTTGGTGGTTGCGTGACGGTGGGGGGGTTGGGCAGGCTATGGCGGGGCGTGGTGTTGGTCTGGTTGTGAGGATGTGAGTGTGCGTTGTTGTGTATTGGGACAGGT
++
+))..28:>C>CDDDDDDCCCCDDD>>A990028>HFFFIIFDDDDDHOTYYNGFAAAA;>??BQQIDDDIDIIIIGMMDDDDDDNIIIGGFFFFIMYKKIKKDD>D>>>C>D>><<<>::..'')46>IIIQIYYYMFDDAADKKKKYYYYYYNNDDDDIIGGKK777MMFFFKDDFAADDDDFKKKKFFFKIDDDDDDKIIIIEMFF=@@@B@BB??>O???OOTTTTLLKKK???DDDD>AAAA>B994B122:=B44/--447<155>>IIFFIKKKGGGGIIN944499C>>>>>>9</--7/00?;33/5/''''))**,.,,,2/0/20004449,,,-,6,--2:G>D>D74-++.15;911**+/-''))****-,''))1.2-.*****-<>71+**()+19:46.--+-*1611+*((****'''''(-/411-1***.((+***('**-8211,-**'''')+,,4,))''))))'')),,(')))).5++))'')).1-+,,.-+(''(++,,,('''))*''''))')+).''))*)-('')+)*((++.+++-*))('))''))+-0./,,))''))'')''*'')))''****.+*''*))'')'')'')))**+++))'''))'''*''*((****'''')'(,(''''''')*''))''))++*(((*((-))'')-)**()******042))***((*))''))*,-.((*)'')%%%)+++****((***-+*)''''))))''''))''''''')))),))***+('))))+.,)()+**''+.-)))(')''))'(***,(((,,***((((**++'')'(*))'''(**'''******((****//--))0+)''))*****))'')%%'')('*)))-(*01**))'((
+@gnlti136479522
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
++
+(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+
b
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini_bait_strict.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_mini_bait_strict.fastq Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,4 @@
+@gnlti136479522
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
++
+(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/README.rst Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,165 @@
+Galaxy wrapper for the MIRA assembly program (v4.0)
+===================================================
+
+This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below (MIT licence).
+
+This tool is a short Python script (to collect the MIRA output and move it
+to where Galaxy expects the files) and associated Galaxy wrapper XML file.
+
+It is available from the Galaxy Tool Shed at:
+http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler 
+
+It uses a Galaxy datatype definition 'mira' for the MIRA Assembly Format,
+http://toolshed.g2.bx.psu.edu/view/peterjc/mira_datatypes
+
+A separate wrapper for MIRA v3.4 is available from the Galaxy Tool Shed at:
+http://toolshed.g2.bx.psu.edu/view/peterjc/mira_assembler
+
+Automated Installation
+======================
+
+This should be straightforward. Via the Tool Shed, Galaxy should automatically
+install the 'mira' datatype, samtools, and download and install the precompiled
+binary for MIRA v4.0.2 for the Galaxy wrapper, and run any tests.
+
+For MIRA 4, the Galaxy wrapper has been split in two, allowing separate
+cluster settings for de novo usage (high RAM) and mapping (lower RAM).
+Consult the Galaxy adminstration documentation for your cluster setup.
+
+WARNING: For larger tasks, be aware that MIRA can require vast amounts
+of RAM and run-times of over a week are possible. This tool wrapper makes
+no attempt to spot and reject such large jobs.
+
+
+Manual Installation
+===================
+
+First install the 'mira' datatype for Galaxy, available here:
+
+* http://toolshed.g2.bx.psu.edu/view/peterjc/mira_datatypes 
+
+There are four Galaxy files to install:
+
+* ``mira4_de_novo.xml`` (the Galaxy tool definition for de novo usage)
+* ``mira4_mapping.xml`` (the Galaxy tool definition for mapping usage)
+* ``mira4_convert.xml`` (the Galaxy tool definition for converting MIRA files)
+* ``mira4_bait.xml`` (the Galaxy tool definition for mirabait)
+* ``mira4.py`` (the Python wrapper script)
+* ``mira4_convert.py`` (the Python wrapper script for miraconvert)
+* ``mira4_bait.py`` (the Python wrapper script for mirabait)
+* ``mira4_validator.py`` (the XML parameter validation script)
+
+The suggested location is a new ``tools/mira4`` folder. You will also need to
+modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool, and also do
+this to ``tools_conf.xml.sample`` in order to run the tests::
+
+  <tool file="mira4/mira4_de_novo.xml" />
+  <tool file="mira4/mira4_mapping.xml" />
+
+You will also need to install MIRA, we used version 4.0.2, and define the
+environment variable ``$MIRA4`` pointing at the folder containing the binaries.
+See:
+
+* http://chevreux.org/projects_mira.html
+* http://sourceforge.net/projects/mira-assembler/
+
+You may wish to use different cluster setups for the de novo and mapping
+tools, see above.
+
+You will also need to install samtools (for generating a BAM file from MIRA's
+SAM output).
+
+After copying (or symlinking) the ``test-data`` files under Galaxy's ``test-data``
+folder, you can run the tests with::
+
+    $ ./run_functional_tests.sh -id mira_4_0_bait
+    $ ./run_functional_tests.sh -id mira_4_0_de_novo
+    $ ./run_functional_tests.sh -id mira_4_0_mapping
+    $ ./run_functional_tests.sh -id mira_4_0_convert
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.1  - Initial version (prototype for MIRA 4.0 RC4, based on wrapper for v3.4)
+v0.0.2  - Include BAM output (using ``miraconvert`` and ``samtools``).
+        - Updated to target MIRA 4.0.1
+        - Simplified XML to apply input format to output data.
+        - Sets temporary folder at run time to respect environment variables
+          (``$TMPDIR``, ``$TEMP``, or ``$TMP`` in that order). This was
+          previously hard coded as ``/tmp``.
+v0.0.3  - Updated to target MIRA 4.0.2
+v0.0.4  - Using optparse for the Python wrapper script API
+        - Made MAF and BAM outputs optional
+        - Include wrapper for ``miraconvert``
+======= ======================================================================
+
+
+Developers
+==========
+
+Development is on a dedicated GitHub repository:
+https://github.com/peterjc/pico_galaxy/tree/master/tools/mira4
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
+the following command from the Galaxy root folder::
+
+    $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4_convert.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_convert.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/header.mira test-data/empty_file.dat
+
+Check this worked::
+
+    $ tar -tzf mira4_wrapper.tar.gz
+    tools/mira4/README.rst
+    tools/mira4/mira4_de_novo.xml
+    tools/mira4/mira4_mapping.xml
+    tools/mira4/mira4_bait.xml
+    tools/mira4/mira4_convert.xml
+    tools/mira4/mira4.py
+    tools/mira4/mira4_make_bam.py
+    tools/mira4/mira4_validator.py
+    tools/mira4/mira4_convert.py
+    tools/mira4/mira4_bait.py
+    tools/mira4/tool_dependencies.xml
+    tools/mira4/repository_dependencies.xml
+    test-data/U13small_m.fastq
+    test-data/U13small_m.mira4_de_novo.fasta
+    test-data/tvc_mini.fastq
+    test-data/tvc_contigs.fasta
+    test-data/tvc_map_ref_strain.fasta
+    test-data/tvc_map_same_strain.fasta
+    test-data/tvc_bait.fasta
+    test-data/tvc_mini_bait_pos.fastq
+    test-data/tvc_mini_bait_strict.fastq
+    test-data/tvc_mini_bait_neg.fastq
+    test-data/ecoli.fastq
+    test-data/ecoli.mira4_de_novo.fasta
+    test-data/header.mira
+    test-data/empty_file.dat
+
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4.py Fri Nov 21 06:42:56 2014 -0500
[
b'@@ -0,0 +1,313 @@\n+#!/usr/bin/env python\n+"""A simple wrapper script to call MIRA and collect its output.\n+"""\n+import os\n+import sys\n+import subprocess\n+import shutil\n+import time\n+import tempfile\n+from optparse import OptionParser\n+\n+#Do we need any PYTHONPATH magic?\n+from mira4_make_bam import make_bam\n+\n+WRAPPER_VER = "0.0.4" #Keep in sync with the XML file\n+\n+def stop_err(msg, err=1):\n+    sys.stderr.write(msg+"\\n")\n+    sys.exit(err)\n+\n+\n+def get_version(mira_binary):\n+    """Run MIRA to find its version number"""\n+    # At the commend line I would use: mira -v | head -n 1\n+    # however there is some pipe error when doing that here.\n+    cmd = [mira_binary, "-v"]\n+    try:\n+        child = subprocess.Popen(cmd,\n+                                 stdout=subprocess.PIPE,\n+                                 stderr=subprocess.STDOUT)\n+    except Exception, err:\n+        sys.stderr.write("Error invoking command:\\n%s\\n\\n%s\\n" % (" ".join(cmd), err))\n+        sys.exit(1)\n+    ver, tmp = child.communicate()\n+    del child\n+    return ver.split("\\n", 1)[0].strip()\n+\n+#Parse Command Line\n+usage = """Galaxy MIRA4 wrapper script v%s - use as follows:\n+\n+$ python mira4.py ...\n+\n+This will run the MIRA binary and collect its output files as directed.\n+""" % WRAPPER_VER\n+parser = OptionParser(usage=usage)\n+parser.add_option("-m", "--manifest", dest="manifest",\n+                  default=None, metavar="FILE",\n+                  help="MIRA manifest filename")\n+parser.add_option("--maf", dest="maf",\n+                  default="-", metavar="FILE",\n+                  help="MIRA MAF output filename")\n+parser.add_option("--bam", dest="bam",\n+                  default="-", metavar="FILE",\n+                  help="Unpadded BAM output filename")\n+parser.add_option("--fasta", dest="fasta",\n+                  default="-", metavar="FILE",\n+                  help="Unpadded FASTA output filename")\n+parser.add_option("--log", dest="log",\n+                  default="-", metavar="FILE",\n+                  help="MIRA logging output filename")\n+parser.add_option("-v", "--version", dest="version",\n+                  default=False, action="store_true",\n+                  help="Show version and quit")\n+options, args = parser.parse_args()\n+manifest = options.manifest\n+out_maf = options.maf\n+out_bam = options.bam\n+out_fasta = options.fasta\n+out_log = options.log\n+\n+try:\n+    mira_path = os.environ["MIRA4"]\n+except KeyError:\n+    stop_err("Environment variable $MIRA4 not set")\n+mira_binary = os.path.join(mira_path, "mira")\n+if not os.path.isfile(mira_binary):\n+    stop_err("Missing mira under $MIRA4, %r\\nFolder contained: %s"\n+             % (mira_binary, ", ".join(os.listdir(mira_path))))\n+mira_convert = os.path.join(mira_path, "miraconvert")\n+if not os.path.isfile(mira_convert):\n+    stop_err("Missing miraconvert under $MIRA4, %r\\nFolder contained: %s"\n+             % (mira_convert, ", ".join(os.listdir(mira_path))))\n+\n+mira_ver = get_version(mira_binary)\n+if not mira_ver.strip().startswith("4.0"):\n+    stop_err("This wrapper is for MIRA V4.0, not:\\n%s\\n%s" % (mira_ver, mira_binary))\n+mira_convert_ver = get_version(mira_convert)\n+if not mira_convert_ver.strip().startswith("4.0"):\n+    stop_err("This wrapper is for MIRA V4.0, not:\\n%s\\n%s" % (mira_ver, mira_convert))\n+if options.version:\n+    print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)\n+    if mira_ver != mira_convert_ver:\n+        print "WARNING: miraconvert %s" % mira_convert_ver\n+    sys.exit(0)\n+\n+if not manifest:\n+    stop_err("Manifest is required")\n+elif not os.path.isfile(manifest):\n+    stop_err("Missing input MIRA manifest file: %r" % manifest)\n+\n+\n+try:\n+    threads = int(os.environ.get("GALAXY_SLOTS", "1"))\n+except ValueError:\n+    threads = 1\n+assert 1 <= threads, threads\n+\n+\n+def override_temp(manifest):\n+    """Override ``-DI:trt=/tmp`` in manifest with environment variable.\n+\n+    Currently MIRA 4 does not allow envronment variables like ``$TMP``\n+    inside the manifest, which'..b't_maf, ref_fasta, out_bam, handle)\n+        else:\n+            #Not collecting the MAF file, use original location        \n+            msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle)\n+        if msg:\n+            stop_err(msg)\n+\n+def clean_up(temp, name):\n+    folder = "%s/%s_assembly" % (temp, name)\n+    if os.path.isdir(folder):\n+        shutil.rmtree(folder)\n+\n+#TODO - Run MIRA in /tmp or a configurable directory?\n+#Currently Galaxy puts us somewhere safe like:\n+#/opt/galaxy-dist/database/job_working_directory/846/\n+temp = "."\n+\n+name = "MIRA"\n+\n+override_temp(manifest)\n+\n+start_time = time.time()\n+cmd_list = [mira_binary, "-t", str(threads), manifest]\n+cmd = " ".join(cmd_list)\n+\n+assert os.path.isdir(temp)\n+d = "%s_assembly" % name\n+#This can fail on my development machine if stale folders exist\n+#under Galaxy\'s .../database/job_working_directory/ tree:\n+assert not os.path.isdir(d), "Path %r already exists:\\n%s" % (d, os.path.abspath(d))\n+try:\n+    #Check path access\n+    os.mkdir(d)\n+except Exception, err:\n+    log_manifest(manifest)\n+    sys.stderr.write("Error making directory %s\\n%s" % (d, err))\n+    sys.exit(1)\n+\n+#print os.path.abspath(".")\n+#print cmd\n+\n+if out_log and out_log != "-":\n+    handle = open(out_log, "w")\n+else:\n+    handle = open(os.devnull, "w")\n+handle.write("======================== MIRA manifest (instructions) ========================\\n")\n+m = open(manifest, "rU")\n+for line in m:\n+    handle.write(line)\n+m.close()\n+del m\n+handle.write("\\n")\n+handle.write("============================ Starting MIRA now ===============================\\n")\n+handle.flush()\n+try:\n+    #Run MIRA\n+    child = subprocess.Popen(cmd_list,\n+                             stdout=handle,\n+                             stderr=subprocess.STDOUT)\n+except Exception, err:\n+    log_manifest(manifest)\n+    sys.stderr.write("Error invoking command:\\n%s\\n\\n%s\\n" % (cmd, err))\n+    #TODO - call clean up?\n+    handle.write("Error invoking command:\\n%s\\n\\n%s\\n" % (cmd, err))\n+    handle.close()\n+    sys.exit(1)\n+#Use .communicate as can get deadlocks with .wait(),\n+stdout, stderr = child.communicate()\n+assert not stdout and not stderr #Should be empty as sent to handle\n+run_time = time.time() - start_time\n+return_code = child.returncode\n+handle.write("\\n")\n+handle.write("============================ MIRA has finished ===============================\\n")\n+handle.write("MIRA took %0.2f hours\\n" % (run_time / 3600.0))\n+if return_code:\n+    print "MIRA took %0.2f hours" % (run_time / 3600.0)\n+    handle.write("Return error code %i from command:\\n" % return_code)\n+    handle.write(cmd + "\\n")\n+    handle.close()\n+    clean_up(temp, name)\n+    log_manifest(manifest)\n+    stop_err("Return error code %i from command:\\n%s" % (return_code, cmd),\n+             return_code)\n+handle.flush()\n+\n+if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):\n+    handle.write("\\n")\n+    handle.write("====================== Extract Large Contigs failed ==========================\\n")\n+    e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU")\n+    for line in e:\n+        handle.write(line)\n+    e.close()\n+    handle.write("============================ (end of ec.log) =================================\\n")\n+    handle.flush()\n+\n+#print "Collecting output..."\n+start_time = time.time()\n+collect_output(temp, name, handle)\n+collect_time = time.time() - start_time\n+handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\\n" % (run_time / 3600.0, collect_time / 60.0))\n+print("MIRA took %0.2f hours; collecting output %0.2f minutes\\n" % (run_time / 3600.0, collect_time / 60.0))\n+\n+if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):\n+    #Treat as an error, but doing this AFTER collect_output\n+    sys.stderr.write("Extract Large Contigs failed\\n")\n+    handle.write("Extract Large Contigs failed\\n")\n+    handle.close()\n+    sys.exit(1)\n+\n+#print "Cleaning up..."\n+clean_up(temp, name)\n+\n+handle.write("\\nDone\\n")\n+handle.close()\n+print("Done")\n'
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_bait.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_bait.py Fri Nov 21 06:42:56 2014 -0500
[
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+"""A simple wrapper script to call MIRA4's mirabait and collect its output.
+"""
+import os
+import sys
+import subprocess
+import shutil
+import time
+
+WRAPPER_VER = "0.0.1" #Keep in sync with the XML file
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg+"\n")
+    sys.exit(err)
+
+
+def get_version(mira_binary):
+    """Run MIRA to find its version number"""
+    # At the commend line I would use: mira -v | head -n 1
+    # however there is some pipe error when doing that here.
+    cmd = [mira_binary, "-v"]
+    try:
+        child = subprocess.Popen(cmd,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+    except Exception, err:
+        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+        sys.exit(1)
+    ver, tmp = child.communicate()
+    del child
+    #Workaround for -v not working in mirabait 4.0RC4
+    if "invalid option" in ver.split("\n", 1)[0]:
+        for line in ver.split("\n", 1):
+            if " version " in line:
+                line = line.split()
+                return line[line.index("version")+1].rstrip(")")
+        stop_err("Could not determine MIRA version:\n%s" % ver)
+    return ver.split("\n", 1)[0]
+
+try:
+    mira_path = os.environ["MIRA4"]
+except KeyError:
+    stop_err("Environment variable $MIRA4 not set")
+mira_binary = os.path.join(mira_path, "mirabait")
+if not os.path.isfile(mira_binary):
+    stop_err("Missing mirabait under $MIRA4, %r\nFolder contained: %s"
+             % (mira_binary, ", ".join(os.listdir(mira_path))))
+mira_ver = get_version(mira_binary)
+if not mira_ver.strip().startswith("4.0"):
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
+if "-v" in sys.argv or "--version" in sys.argv:
+    print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)
+    sys.exit(0)
+
+
+format, output_choice, strand_choice, kmer_length, min_occurance, bait_file, in_file, out_file = sys.argv[1:]
+
+if format.startswith("fastq"):
+    format = "fastq"
+elif format == "mira":
+    format = "maf"
+elif format != "fasta":
+    stop_err("Was not expected format %r" % format)
+
+assert out_file.endswith(".dat")
+out_file_stem = out_file[:-4]
+
+cmd_list = [mira_binary, "-f", format, "-t", format,
+            "-k", kmer_length, "-n", min_occurance,
+            bait_file, in_file, out_file_stem]
+if output_choice == "pos":
+    pass
+elif output_choice == "neg":
+    #Invert the selection...
+    cmd_list.insert(1, "-i")
+else:
+    stop_err("Output choice should be 'pos' or 'neg', not %r" % output_choice)
+if strand_choice == "both":
+    pass
+elif strand_choice == "fwd":
+    #Ingore reverse strand...
+    cmd_list.insert(1, "-r")
+else:
+    stop_err("Strand choice should be 'both' or 'fwd', not %r" % strand_choice)
+
+cmd = " ".join(cmd_list)
+#print cmd
+start_time = time.time()
+try:
+    #Run MIRA
+    child = subprocess.Popen(cmd_list,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.STDOUT)
+except Exception, err:
+    log_manifest(manifest)
+    sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
+    sys.exit(1)
+#Use .communicate as can get deadlocks with .wait(),
+stdout, stderr = child.communicate()
+assert stderr is None # Due to way we ran with subprocess
+run_time = time.time() - start_time
+return_code = child.returncode
+print "mirabait took %0.2f minutes" % (run_time / 60.0)
+
+if return_code:
+    sys.stderr.write(stdout)
+    stop_err("Return error code %i from command:\n%s" % (return_code, cmd),
+             return_code)
+
+#Capture output
+out_tmp = out_file_stem + "." + format
+if not os.path.isfile(out_tmp):
+    sys.stderr.write(stdout)
+    stop_err("Missing output file from mirabait: %s" % out_tmp)
+shutil.move(out_tmp, out_file)
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_bait.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_bait.xml Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,112 @@
+<tool id="mira_4_0_bait" name="MIRA v4.0 mirabait" version="0.0.3">
+    <description>Filter reads using kmer matches</description>
+    <requirements>
+        <requirement type="binary">mirabait</requirement>
+        <requirement type="package" version="4.0">MIRA</requirement>
+    </requirements>
+    <version_command interpreter="python">mira4_bait.py --version</version_command>
+    <command interpreter="python">
+mira4_bait.py $input_reads.ext $output_choice $strand_choice $kmer_length $min_occurence "$bait_file" "$input_reads" "$output_reads"
+    </command>
+    <stdio>
+        <!-- Assume anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <inputs>
+        <param name="bait_file" type="data" format="fasta,fastq,mira" required="true" label="Bait file (what to look for)" />
+        <param name="input_reads" type="data" format="fasta,fastq,mira" required="true" label="Reads to search" />
+        <param name="output_choice" type="select" label="Output positive matches, or negative matches?">
+            <option value="pos">Just positive matches</option>
+            <option value="neg">Just negative matches</option>
+        </param>
+        <param name="strand_choice" type="select" label="Check for matches on both strands?">
+            <option value="both">Check both strands</option>
+            <option value="fwd">Just forward strand</option>
+        </param>
+        <param name="kmer_length" type="integer" value="31" min="1" max="32"
+               label="k-mer length" help="Maximum 32" />
+        <param name="min_occurence" type="integer" value="1" min="1"
+               label="Minimum k-mer occurence"
+               help="How many k-mer matches do you want per read? Minimum one" />
+    </inputs>
+    <outputs>
+        <data name="output_reads" format="input" metadata_source="input_reads"
+       label="$input_reads.name #if str($output_choice)=='pos' then 'matching' else 'excluding matches to' # $bait_file.name"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
+            <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" />
+            <output name="output_reads" file="tvc_mini_bait_pos.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
+            <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" />
+            <param name="kmer_length" value="32" />
+            <param name="min_occurence" value="50" />
+            <output name="output_reads" file="tvc_mini_bait_strict.fastq" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
+            <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" />
+            <param name="output_choice" value="neg" />
+            <output name="output_reads" file="tvc_mini_bait_neg.fastq" ftype="fastqsanger" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Runs the ``mirabait`` utility from MIRA v4.0 to filter your input reads
+according to whether or not they contain perfect kmer matches to your
+bait file. By default this looks for 31-mers (kmers or *k*-mers where
+the fragment length *k* is 31), and only requires a single matching kmer.
+
+The ``mirabait`` utility is useful in many applications and pipelines
+outside of using the main MIRA tool for assembly or mapping.
+
+.. class:: warningmark
+
+Note ``mirabait`` cannot be used on protein (amino acid) sequences.
+
+**Example Usage**
+
+To remove over abundant entries like rRNA sequences, run ``mirabait`` with
+known rRNA sequences as the bait and select the *negative* matches.
+
+To do targeted assembly by fishing out reads belonging to a gene and just
+assemble these, run ``mirabait`` with the gene of interest as the bait and
+select the *positive* matches.
+
+To iteratively reconstruct mitochondria you could start by fishing out reads
+matching any known mitochondrial sequence, assembly those, and repeat.
+
+
+**Notes on paired read**
+
+.. class:: warningmark
+
+While MIRA4 is aware of many read naming conventions to identify paired read
+partners, the ``mirabait`` tool considers each read in isolation. Applying
+it to paired read files may leave you with orphaned reads.
+
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
+    </help>
+</tool>
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_convert.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.py Fri Nov 21 06:42:56 2014 -0500
[
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+"""A simple wrapper script to call MIRA and collect its output.
+
+This focuses on the miraconvert binary.
+"""
+import os
+import sys
+import subprocess
+import shutil
+import time
+import tempfile
+from optparse import OptionParser
+try:
+    from io import BytesIO
+except ImportError:
+    #Should we worry about Python 2.5 or older?
+    from StringIO import StringIO as BytesIO
+
+#Do we need any PYTHONPATH magic?
+from mira4_make_bam import depad
+
+WRAPPER_VER = "0.0.5" #Keep in sync with the XML file
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg+"\n")
+    sys.exit(err)
+
+def run(cmd):
+    #Avoid using shell=True when we call subprocess to ensure if the Python
+    #script is killed, so too is the child process.
+    try:
+        child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    except Exception, err:
+        stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+    #Use .communicate as can get deadlocks with .wait(),
+    stdout, stderr = child.communicate()
+    return_code = child.returncode
+    if return_code:
+        if stderr and stdout:
+            stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr))
+        else:
+            stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr))
+
+def get_version(mira_binary):
+    """Run MIRA to find its version number"""
+    # At the commend line I would use: mira -v | head -n 1
+    # however there is some pipe error when doing that here.
+    cmd = [mira_binary, "-v"]
+    try:
+        child = subprocess.Popen(cmd,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+    except Exception, err:
+        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+        sys.exit(1)
+    ver, tmp = child.communicate()
+    del child
+    return ver.split("\n", 1)[0].strip()
+
+#Parse Command Line
+usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
+
+$ python mira4_convert.py ...
+
+This will run the MIRA miraconvert binary and collect its output files as directed.
+""" % WRAPPER_VER
+parser = OptionParser(usage=usage)
+parser.add_option("--input", dest="input",
+                  default=None, metavar="FILE",
+                  help="MIRA input filename")
+parser.add_option("-x", "--min_length", dest="min_length",
+                  default="0",
+                  help="Minimum contig length")
+parser.add_option("-y", "--min_cover", dest="min_cover",
+                  default="0",
+                  help="Minimum average contig coverage")
+parser.add_option("-z", "--min_reads", dest="min_reads",
+                  default="0",
+                  help="Minimum reads per contig")
+parser.add_option("--maf", dest="maf",
+                  default="", metavar="FILE",
+                  help="MIRA MAF output filename")
+parser.add_option("--ace", dest="ace",
+                  default="", metavar="FILE",
+                  help="ACE output filename")
+parser.add_option("--bam", dest="bam",
+                  default="", metavar="FILE",
+                  help="Unpadded BAM output filename")
+parser.add_option("--fasta", dest="fasta",
+                  default="", metavar="FILE",
+                  help="Unpadded FASTA output filename")
+parser.add_option("--cstats", dest="cstats",
+                  default="", metavar="FILE",
+                  help="Contig statistics filename")
+parser.add_option("-v", "--version", dest="version",
+                  default=False, action="store_true",
+                  help="Show version and quit")
+options, args = parser.parse_args()
+if args:
+    stop_err("Expected options (e.g. --input example.maf), not arguments")
+
+input_maf = options.input
+out_maf = options.maf
+out_bam = options.bam
+out_fasta = options.fasta
+out_ace = options.ace
+out_cstats = options.cstats
+
+try:
+    mira_path = os.environ["MIRA4"]
+except KeyError:
+    stop_err("Environment variable $MIRA4 not set")
+mira_convert = os.path.join(mira_path, "miraconvert")
+if not os.path.isfile(mira_convert):
+    stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
+             % (mira_convert, ", ".join(os.listdir(mira_path))))
+
+mira_convert_ver = get_version(mira_convert)
+if not mira_convert_ver.strip().startswith("4.0"):
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
+if options.version:
+    print "%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)
+    sys.exit(0)
+
+if not input_maf:
+    stop_err("Input MIRA file is required")
+elif not os.path.isfile(input_maf):
+    stop_err("Missing input MIRA file: %r" % input_maf)
+
+if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
+    stop_err("No output requested")
+
+
+def check_min_int(value, name):
+    try:
+        i = int(value)
+    except:
+        stop_err("Bad %s setting, %r" % (name, value))
+    if i < 0:
+        stop_err("Negative %s setting, %r" % (name, value))
+    return i
+
+min_length = check_min_int(options.min_length, "minimum length")
+min_cover = check_min_int(options.min_cover, "minimum cover")
+min_reads = check_min_int(options.min_reads, "minimum reads")
+
+#TODO - Run MIRA in /tmp or a configurable directory?
+#Currently Galaxy puts us somewhere safe like:
+#/opt/galaxy-dist/database/job_working_directory/846/
+temp = "."
+
+
+cmd_list = [mira_convert]
+if min_length:
+    cmd_list.extend(["-x", str(min_length)])
+if min_cover:
+    cmd_list.extend(["-y", str(min_cover)])
+if min_reads:
+    cmd_list.extend(["-z", str(min_reads)])
+cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
+if out_maf:
+    cmd_list.append("maf")
+if out_bam:
+    cmd_list.append("samnbb")
+    if not out_fasta:
+        #Need this for samtools depad
+        out_fasta = os.path.join(temp, "depadded.fasta")
+if out_fasta:
+    cmd_list.append("fasta")
+if out_ace:
+    cmd_list.append("ace")
+if out_cstats:
+    cmd_list.append("cstats")
+run(cmd_list)
+
+def collect(old, new):
+    if not os.path.isfile(old):
+        stop_err("Missing expected output file %s" % old)
+    shutil.move(old, new)
+
+if out_maf:
+    collect(os.path.join(temp, "converted.maf"), out_maf)
+if out_fasta:
+    #Can we look at the MAF file to see if there are multiple strains?
+    old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
+    if os.path.isfile(old):
+        collect(old, out_fasta)
+    else:
+        #Might the output be filtered down to zero contigs?
+        old = os.path.join(temp, "converted.fasta")
+        if not os.path.isfile(old):
+            stop_err("Missing expected output FASTA file")
+        elif os.path.getsize(old) == 0:
+            print("Warning - no contigs (harsh filters?)")
+            collect(old, out_fasta)
+        else:
+            stop_err("Missing expected output FASTA file (only generic file present)")
+if out_ace:
+    collect(os.path.join(temp, "converted.maf"), out_ace)
+if out_cstats:
+    collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
+
+if out_bam:
+    assert os.path.isfile(out_fasta)
+    old = os.path.join(temp, "converted.samnbb")
+    if not os.path.isfile(old):
+        old = os.path.join(temp, "converted.sam")
+    if not os.path.isfile(old):
+        stop_err("Missing expected intermediate file %s" % old)
+    h = BytesIO()
+    msg = depad(out_fasta, old, out_bam, h)
+    if msg:
+        print(msg)
+        print(h.getvalue())
+        h.close()
+        sys.exit(1)
+    h.close()
+    if out_fasta == os.path.join(temp, "depadded.fasta"):
+        #Not asked for by Galaxy, no longer needed
+        os.remove(out_fasta)
+
+if min_length or min_cover or min_reads:
+    print("Filtered.")
+else:
+    print("Converted.")
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_convert.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.xml Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,114 @@
+<tool id="mira_4_0_convert" name="MIRA v4.0 miraconvert" version="0.0.5">
+    <description>Convert MIRA assembly to FASTA/SAM/BAM</description>
+    <requirements>
+        <requirement type="binary">miraconvert</requirement>
+        <requirement type="package" version="4.0">MIRA</requirement>
+    </requirements>
+    <version_command interpreter="python">mira4_convert.py --version</version_command>
+    <command interpreter="python">
+mira4_convert.py
+--input "$mira_file"
+--min_length $min_length
+--min_cover $min_cover
+--min_reads $min_reads
+#if str($maf_wanted)=="true":
+--maf "$out_maf"
+#end if
+#if str($fasta_wanted)=="true":
+--fasta "$out_fasta"
+#end if
+#if str($bam_wanted)=="true":
+--bam "$out_bam"
+#end if
+##Don't yet have a Galaxy datatype defined for ace:
+## #if str($ace_wanted)=="true":
+## --ace "$out_ace"
+## #end if
+#if str($cstats_wanted)=="true":
+--cstats "$out_cstats"
+#end if
+    </command>
+    <stdio>
+        <!-- Assume anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+    <inputs>
+        <param name="mira_file" type="data" format="mira" required="true" label="MIRA Assembly Format input" />
+        <!-- TODO - top level select for contig versus read output? Or two Galaxy tools in different XML files? -->
+        <param name="min_length" type="integer" required="false" value="0" min="0"
+               label="Minimum contig length"
+               help="e.g. Set to 1000 to exclude small contigs. Default is to keep all contigs (minimum zero)" />
+        <param name="min_cover" type="integer" required="false" value="0" min="0"
+               label="Minimum average contig coverage"
+               help="e.g. Set to 10 to exclude low coverage contigs. Default is to keep all contigs (minimum zero)" />
+        <param name="min_reads" type="integer" required="false" value="0" min="0"
+               label="Minimum reads per contig"
+               help="e.g. Set to 5 to exclude low coverage contigs with only a few reads. Default is to keep all contigs (minimum zero)." />
+        <param name="maf_wanted" type="boolean" label="Output assembly in MIRA's own format? (useful if filtering)" checked="False" />
+        <param name="fasta_wanted" type="boolean" label="Convert assembly into (unpadded) FASTA?" checked="True" />
+        <param name="bam_wanted" type="boolean" label="Convert assembly into (upadded) BAM format?" checked="False" />
+        <!-- Don't yet have a Galaxy datatype defined for ace:
+        <param name="ace_wanted" type="boolean" label="Convert assembly in ACE format?" checked="False" />
+        -->
+        <param name="cstats_wanted" type="boolean" label="Assembly statistics file?" checked="False" />
+    </inputs>
+    <outputs>
+        <data name="out_maf" format="mira" label="$mira_file.name (filtered)">
+              <filter>maf_wanted is True</filter>
+        </data>
+        <data name="out_fasta" format="fasta" label="$mira_file.name (as FASTA)">
+              <filter>fasta_wanted is True</filter>
+        </data>
+        <data name="out_bam" format="bam" label="$mira_file.name (as BAM)">
+              <filter>bam_wanted is True</filter>
+        </data>
+        <!--
+        <data name="out_ace" format="ace" label="$mira_file.name (as ACE)">
+            <filter>ace_wanted is True</filter>
+        </data>
+        -->
+        <data name="out_cstats" format="tabular" label="$mira_file.name (filtered stats)">
+              <filter>cstats_wanted is True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- TODO -->
+    </tests>
+    <help>
+**What it does**
+
+Runs the ``miraconvert`` utility from MIRA v4.0 to filter and/or convert
+a MIRA Assembly Format file produced by a *mapping* or *de novo* assembly.
+
+**Example Usage**
+
+You want to remove all the low coverage contigs from a transcriptome
+assembly to focus on those with higher coverage.
+
+You want to convert your MIRA assembly into SAM/BAM to run a standard
+SNP finding tool.
+
+You've lost the FASTA consensus from your MIRA assembly and need to
+regenerate it.
+
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
+    </help>
+</tool>
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_de_novo.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_de_novo.xml Fri Nov 21 06:42:56 2014 -0500
b
b'@@ -0,0 +1,263 @@\n+<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.4">\n+    <description>Takes Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description>\n+    <requirements>\n+        <requirement type="binary">mira</requirement>\n+        <requirement type="binary">miraconvert</requirement>\n+        <requirement type="package" version="4.0">MIRA</requirement>\n+        <requirement type="binary">samtools</requirement>\n+        <requirement type="package" version="0.1.19">samtools</requirement>\n+    </requirements>\n+    <version_command interpreter="python">mira4.py --version</version_command>\n+    <command interpreter="python">mira4.py\n+--manifest "$manifest"\n+#if str($maf_wanted)=="true":\n+--maf "$out_maf"\n+#end if\n+#if str($bam_wanted)=="true":\n+--bam "$out_bam"\n+#end if\n+--fasta "$out_fasta"\n+--log "$out_log"\n+    </command>\n+    <stdio>\n+        <!-- Assume anything other than zero is an error -->\n+        <exit_code range="1:" />\n+        <exit_code range=":-1" />\n+    </stdio>\n+    <inputs>\n+        <param name="job_type" type="select" label="Assembly type">\n+            <option value="genome">Genome</option>\n+            <option value="est">EST (transcriptome)</option>\n+        </param>\n+        <param name="job_quality" type="select" label="Assembly quality grade">\n+            <option value="accurate">Accurate</option>\n+            <option value="draft">Draft</option>\n+        </param>\n+        <repeat name="read_group" title="Read Group" min="1">\n+            <param name="technology" type="select" label="Read technology">\n+                <option value="solexa">Solexa/Illumina</option>\n+                <option value="sanger">Sanger cappillary sequencing</option>\n+                <option value="454">Roche 454</option>\n+                <option value="iontor">Ion Torrent</option>\n+                <option value="pcbiolq">PacBio low quality (raw)</option>\n+                <option value="pcbiohq">PacBio high quality (corrected)</option>\n+                <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>\n+                <!-- TODO reference/backbone as an entry here? -->\n+            </param>\n+            <conditional name="segments">\n+                <param name="type" type="select" label="Are these paired reads?">\n+                    <option value="paired">Paired reads</option>\n+                    <option value="none">Single reads or not relevant (e.g. primer walking with Sanger capillary sequencing)</option>\n+                </param>\n+                <when value="paired">\n+                    <param name="placement" type="select" label="Pairing type (segment placing)">\n+                        <option value="FR">---&gt; &lt;--- (e.g. Sanger capillary or Solexa/Illumina paired-end library)</option>\n+                        <option value="RF">&lt;--- ---&gt; (e.g. Solexa/Illumina mate-pair library)</option>\n+                        <option value="SB">2---&gt; 1---&gt; (e.g. Roche 454 paired-end libraries or IonTorrent long-mate; see note)</option>\n+                    </param>\n+                    <!-- min/max validation is done via the <code> tag -->\n+                    <param name="min_size" type="integer" optional="true" min="0" value=""\n+                           label="Minimum size of \'good\' DNA templates in the library preparation"\n+                           help="Optional, but if used you must also supply a maximum value." /> \n+                    <param name="max_size" type="integer" optional="true" min="0" value=""\n+                           label="Maximum size of \'good\' DNA templates in the library preparation"\n+                           help="Optional, but if used you must also supply a minimum value." />\n+                    <param name="naming" type="select" label="Pair naming convention">\n+                        <option value="solexa">Solexa/Illumina (using \'/1\' and \'/2\' suffixes, or later Illumina c'..b'ger" />\n+            <param name="maf_wanted" value="true"/>\n+            <param name="bam_wanted" value="true"/>\n+            <output name="out_fasta" file="U13small_m.mira4_de_novo.fasta" ftype="fasta" />\n+            <output name="out_bam" file="empty_file.dat" compare="contains" />\n+            <!-- TODO: Suggest startswith as a compare method? -->\n+            <output name="out_maf" file="header.mira" compare="contains" />\n+            <output name="out_log" file="empty_file.dat" compare="contains" />\n+        </test>\n+        <!-- Simple assembly based on MIRA\'s minidemo/solexa1 example\n+             Note we\'re using just one repeat group,\n+             but two parameters within the repeat (filename, no pairing)\n+         -->\n+        <test>\n+            <param name="job_type" value="genome" />\n+            <param name="job_quality" value="accurate" />\n+            <param name="type" value="none" />\n+            <param name="filenames" value="ecoli.fastq" ftype="fastqsanger" />\n+            <param name="maf_wanted" value="false"/>\n+            <param name="bam_wanted" value="false"/>\n+            <output name="out_fasta" file="ecoli.mira4_de_novo.fasta" ftype="fasta" />\n+            <output name="out_log" file="empty_file.dat" compare="contains" />\n+        </test>\n+    </tests>\n+    <help>\n+\n+**What it does**\n+\n+Runs MIRA v4.0 in de novo mode, collects the output, generates a sorted BAM\n+file, and then throws away all the temporary files.\n+\n+MIRA is an open source assembly tool capable of handling sequence data from\n+a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent\n+and also PacBio).\n+\n+It is particularly suited to small genomes such as bacteria.\n+\n+\n+**Notes on paired reads**\n+\n+.. class:: warningmark\n+\n+MIRA uses read naming conventions to identify paired read partners\n+(and does not care about their order in the input files). In most cases,\n+the Solexa/Illumina setting is fine. For Sanger capillary sequencing,\n+you may need to rename your reads to match one of the standard conventions\n+supported by MIRA. For Roche 454 or Ion Torrent the appropriate settings\n+depend on how the FASTQ file was produced:\n+\n+* If using Roche\'s ``sffinfo`` or older versions of ``sff_extract``\n+  to convert SFF files to FASTQ, your reads will probably have the\n+  ``---&gt; &lt;---`` orientation and use the ``.f`` and ``.r``\n+  suffixes (FR naming).\n+\n+* If using a recent version of ``sff_extract``, then the ``/1`` and ``/2``\n+  suffixes are used (Solexa/Illumina style naming) and the original\n+  ``2---&gt; 1---&gt;`` orientation is preserved.\n+\n+The reason for this is the raw data for Roche 454 and Ion Torrent paired-end\n+libraries sequences a circularised fragment such that the raw data begins\n+with the end of the fragment, a linker, then the start of the fragment.\n+This means both the start and end are sequenced from the same strand, and\n+have the orientation ``2---&gt; 1---&gt;``. However, in order to use the data\n+with traditional tools expecting Sanger capillary style ``---&gt; &lt;---``\n+orientation it was common to reverse complement one of the pair to mimic this.\n+\n+\n+**Citation**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite the following papers:\n+\n+Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n+Galaxy tools and workflows for sequence analysis with applications\n+in molecular plant pathology. PeerJ 1:e167\n+http://dx.doi.org/10.7717/peerj.167\n+\n+Bastien Chevreux, Thomas Wetter and S\xc3\xa1ndor Suhai (1999).\n+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.\n+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.\n+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler\n+    </help>\n+</tool>\n'
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_make_bam.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_make_bam.py Fri Nov 21 06:42:56 2014 -0500
[
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+"""Wrapper script using miraconvert & samtools to get BAM from MIRA.
+"""
+import os
+import sys
+import shutil
+import subprocess
+import tempfile
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg+"\n")
+    sys.exit(err)
+
+def run(cmd, log_handle):
+    try:
+        child = subprocess.Popen(cmd, shell=True,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+    except Exception, err:
+        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
+        #TODO - call clean up?
+        log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
+        sys.exit(1)
+    #Use .communicate as can get deadlocks with .wait(),
+    stdout, stderr = child.communicate()
+    assert not stderr #Should be empty as sent to stdout
+    if len(stdout) > 10000:
+        #miraconvert can be very verbose (is holding stdout in RAM a problem?)
+        stdout = stdout.split("\n")
+        stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:]
+        stdout = "\n".join(stdout)
+    log_handle.write(stdout)
+    return child.returncode
+
+def depad(fasta_file, sam_file, bam_file, log_handle):
+    log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n")
+    #Also doing SAM to (uncompressed) BAM during depad
+    bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder
+    cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem)
+    return_code = run(cmd, log_handle)
+    if return_code:
+        return "Error %i from command:\n%s" % (return_code, cmd)
+    if not os.path.isfile(bam_stem + ".bam"):
+        return "samtools depad or sort failed to produce BAM file"
+
+    log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n")
+    cmd = 'samtools index "%s.bam"' % bam_stem
+    return_code = run(cmd, log_handle)
+    if return_code:
+        return "Error %i from command:\n%s" % (return_code, cmd)
+    if not os.path.isfile(bam_stem + ".bam.bai"):
+        return "samtools indexing of BAM file failed to produce BAI file"
+
+    shutil.move(bam_stem + ".bam", bam_file)
+    os.remove(bam_stem + ".bam.bai") #Let Galaxy handle that...
+
+
+def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle):
+    if not os.path.isfile(mira_convert):
+        return "Missing binary %r" % mira_convert
+    if not os.path.isfile(maf_file):
+        return "Missing input MIRA file: %r" % maf_file
+    if not os.path.isfile(fasta_file):
+        return "Missing padded FASTA file: %r" % fasta_file
+
+    log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n")
+    tmp_dir = tempfile.mkdtemp()
+    sam_file = os.path.join(tmp_dir, "x.sam")
+
+    # Note add nbb to the template name, possible MIRA 4.0 RC4 bug
+    cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file)
+    return_code = run(cmd, log_handle)
+    if return_code:
+        return "Error %i from command:\n%s" % (return_code, cmd)
+    if not os.path.isfile(sam_file):
+        return "Conversion from MIRA to SAM failed"
+
+    #Also doing SAM to (uncompressed) BAM during depad
+    msg = depad(fasta_file, sam_file, bam_file, log_handle)
+    if msg:
+        return msg
+
+    os.remove(sam_file)
+    os.rmdir(tmp_dir)
+
+    return None #Good :)
+
+if __name__ == "__main__":
+    mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:]
+    msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout)
+    if msg:
+        stop_err(msg)
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_mapping.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_mapping.xml Fri Nov 21 06:42:56 2014 -0500
b
b'@@ -0,0 +1,267 @@\n+<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.4">\n+    <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description>\n+    <requirements>\n+        <requirement type="binary">mira</requirement>\n+        <requirement type="binary">miraconvert</requirement>\n+        <requirement type="package" version="4.0">MIRA</requirement>\n+        <requirement type="binary">samtools</requirement>\n+        <requirement type="package" version="0.1.19">samtools</requirement>\n+    </requirements>\n+    <version_command interpreter="python">mira4.py --version</version_command>\n+    <command interpreter="python">mira4.py\n+--manifest "$manifest"\n+#if str($maf_wanted) == "true":\n+--maf "$out_maf"\n+#end if\n+#if str($bam_wanted) == "true":\n+--bam "$out_bam"\n+#end if\n+--fasta "$out_fasta"\n+--log "$out_log"\n+    </command>\n+    <stdio>\n+        <!-- Assume anything other than zero is an error -->\n+        <exit_code range="1:" />\n+        <exit_code range=":-1" />\n+    </stdio>\n+    <inputs>\n+        <param name="job_type" type="select" label="Assembly type">\n+            <option value="genome">Genome</option>\n+            <option value="est">EST (transcriptome)</option>\n+        </param>\n+        <param name="job_quality" type="select" label="Assembly quality grade">\n+            <option value="accurate">Accurate</option>\n+            <option value="draft">Draft</option>\n+        </param>\n+        <!-- TODO? Allow technology type for references? -->\n+        <!-- TODO? Allow strain settings for reference(s) and reads? -->\n+        <!-- TODO? Use a repeat to allow for multi-strain references? -->\n+        <!-- TODO? Add strain to the mapping read groups? -->\n+        <param name="references" type="data" format="fasta,fastq,mira" multiple="true" required="true" label="Backbone reference file(s)"\n+               help="Multiple files allowed, for example one FASTA file per chromosome or plasmid." />\n+        <param name="strain_setup" type="select" label="Strain configuration (reference vs reads)">\n+            <option value="default">Different strains - mapping reads onto a related reference (\'StrainX\' vs \'ReferenceStrain\')</option>\n+            <option value="same">Same strain - mapping reads from same reference (all \'StrainX\')</option>\n+        </param>\n+        <repeat name="read_group" title="Read Group" min="1">\n+            <param name="technology" type="select" label="Read technology">\n+                <option value="solexa">Solexa/Illumina</option>\n+                <option value="sanger">Sanger cappillary sequencing</option>\n+                <option value="454">Roche 454</option>\n+                <option value="iontor">Ion Torrent</option>\n+                <option value="pcbiolq">PacBio low quality (raw)</option>\n+                <option value="pcbiohq">PacBio high quality (corrected)</option>\n+                <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>\n+            </param>\n+            <conditional name="segments">\n+                <param name="type" type="select" label="Are these paired reads?">\n+                    <option value="paired">Paired reads</option>\n+                    <option value="none">Single reads or not relevant (e.g. primer walking with Sanger capillary sequencing)</option>\n+                </param>\n+                <when value="paired">\n+                    <param name="placement" type="select" label="Pairing type (segment placing)">\n+                        <option value="FR">---&gt; &lt;--- (e.g. Sanger capillary or Solexa/Illumina paired-end library)</option>\n+                        <option value="RF">&lt;--- ---&gt; (e.g. Solexa/Illumina mate-pair library)</option>\n+                        <option value="SB">2---&gt; 1---&gt; (e.g. Roche 454 paired-end libraries or IonTorrent long-mate; see note)</option>\n+                    </param>\n+                    <param name="naming" type="sele'..b'none" />\n+            <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />\n+            <param name="maf_wanted" value="true"/>\n+            <param name="bam_wanted" value="true"/>\n+            <output name="out_fasta" file="tvc_map_ref_strain.fasta" ftype="fasta" />\n+            <output name="out_bam" file="empty_file.dat" compare="contains" />\n+            <!-- TODO: Suggest startswith as a compare method? -->\n+            <output name="out_maf" file="header.mira" compare="contains" />\n+            <output name="out_log" file="empty_file.dat" compare="contains" />\n+        </test>\n+        <test>\n+            <param name="job_type" value="genome" />\n+            <param name="job_quality" value="accurate" />\n+            <param name="references" value="tvc_contigs.fasta" ftype="fasta" />\n+            <param name="strain_setup" value="same" />\n+            <param name="type" value="none" />\n+            <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />\n+            <param name="maf_wanted" value="false"/>\n+            <param name="bam_wanted" value="false"/>\n+            <output name="out_fasta" file="tvc_map_same_strain.fasta" ftype="fasta" />\n+            <output name="out_log" file="empty_file.dat" compare="contains" />\n+        </test>\n+    </tests>\n+    <help>\n+\n+**What it does**\n+\n+Runs MIRA v4.0 in mapping mode, collects the output, generates a sorted BAM\n+file, and throws away all the temporary files.\n+\n+MIRA is an open source assembly tool capable of handling sequence data from\n+a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent\n+and also PacBio).\n+\n+It is particularly suited to small genomes such as bacteria.\n+\n+\n+**Notes on paired reads**\n+\n+.. class:: warningmark\n+\n+MIRA uses read naming conventions to identify paired read partners\n+(and does not care about their order in the input files). In most cases,\n+the Solexa/Illumina setting is fine. For Sanger capillary sequencing,\n+you may need to rename your reads to match one of the standard conventions\n+supported by MIRA. For Roche 454 or Ion Torrent the appropriate settings\n+depend on how the FASTQ file was produced:\n+\n+* If using Roche\'s ``sffinfo`` or older versions of ``sff_extract``\n+  to convert SFF files to FASTQ, your reads will probably have the\n+  ``---&gt; &lt;---`` orientation and use the ``.f`` and ``.r``\n+  suffixes (FR naming).\n+\n+* If using a recent version of ``sff_extract``, then the ``/1`` and ``/2``\n+  suffixes are used (Solexa/Illumina style naming) and the original\n+  ``2---&gt; 1---&gt;`` orientation is preserved.\n+\n+The reason for this is the raw data for Roche 454 and Ion Torrent paired-end\n+libraries sequences a circularised fragment such that the raw data begins\n+with the end of the fragment, a linker, then the start of the fragment.\n+This means both the start and end are sequenced from the same strand, and\n+have the orientation ``2---&gt; 1---&gt;``. However, in order to use the data\n+with traditional tools expecting Sanger capillary style ``---&gt; &lt;---``\n+orientation it was common to reverse complement one of the pair to mimic this.\n+\n+\n+**Citation**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite the following papers:\n+\n+Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n+Galaxy tools and workflows for sequence analysis with applications\n+in molecular plant pathology. PeerJ 1:e167\n+http://dx.doi.org/10.7717/peerj.167\n+\n+Bastien Chevreux, Thomas Wetter and S\xc3\xa1ndor Suhai (1999).\n+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.\n+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.\n+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler\n+    </help>\n+</tool>\n'
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_validator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_validator.py Fri Nov 21 06:42:56 2014 -0500
[
@@ -0,0 +1,64 @@
+#Called from the Galaxy Tool XML file
+#import sys
+
+def validate_input(trans, error_map, param_values, page_param_map):
+    """Validates the min_size/max_size user input, before execution."""
+    err_list = []
+    for read_group in param_values["read_group"]:
+        err = dict()
+        segments = read_group["segments"]
+        if str(segments["type"]) != "paired":
+            err_list.append(dict())
+            continue
+
+        min_size = str(segments["min_size"]).strip()
+        max_size = str(segments["max_size"]).strip()
+        #sys.stderr.write("DEBUG min_size=%r, max_size=%r\n" % (min_size, max_size))
+
+        #Somehow Galaxy seems to turn an empty field into string "None"...
+        if min_size=="None":
+            min_size = ""
+        if max_size=="None":
+            max_size = ""
+
+        if min_size=="" and max_size=="":
+            #Both missing is good
+            pass
+        elif min_size=="":
+            err["min_size"] = "Minimum size required if maximum size given"
+        elif max_size=="":
+            err["max_size"] = "Maximum size required if minimum size given"
+            
+        if min_size:
+            try:
+                min_size_int = int(min_size)
+                if min_size_int < 0:
+                    err["min_size"] = "Minumum size must not be negative (%i)" % min_size_int
+                    min_size = None # Avoid doing comparison below
+            except ValueError:
+                err["min_size"] = "Minimum size is not an integer (%s)" % min_size
+                min_size = None # Avoid doing comparison below
+
+        if max_size:
+            try:
+                max_size_int = int(max_size)
+                if max_size_int< 0:
+                    err["max_size"] = "Maximum size must not be negative (%i)" % max_size_int
+                    max_size = None # Avoid doing comparison below
+            except ValueError:
+                err["max_size"] = "Maximum size is not an integer (%s)" % max_size
+                max_size = None # Avoid doing comparison below
+
+        if min_size and max_size and min_size_int > max_size_int:
+            msg = "Minimum size must be less than maximum size (%i vs %i)" % (min_size_int, max_size_int)
+            err["min_size"] = msg
+            err["max_size"] = msg
+
+        if err:
+            err_list.append({"segments":err})
+        else:
+            err_list.append(dict())
+
+    if any(err_list):
+        #Return an error map only if any readgroup gave errors
+        error_map["read_group"] = err_list
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/repository_dependencies.xml Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the MIRA datatype definitions (e.g. the MIRA Assembly Format).">
+    <repository changeset_revision="ddd2e3362c5e" name="mira_datatypes" owner="peterjc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/tool_dependencies.xml Fri Nov 21 06:42:56 2014 -0500
b
@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="samtools" version="0.1.19">
+        <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="MIRA" version="4.0">
+        <install version="1.0">
+            <actions_group>
+                <!-- Download the binaries for MIRA compatible with 64-bit OSX. -->
+                <actions architecture="x86_64" os="darwin">
+                    <action type="download_by_url">http://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0.2_darwin13.1.0_x86_64_static.tar.bz2</action>
+                    <action type="move_directory_files">
+                         <source_directory>bin</source_directory>
+                         <destination_directory>$INSTALL_DIR</destination_directory>
+                     </action>
+                </actions>
+                <!-- Download the binaries for MIRA compatible with 64-bit Linux. -->
+                <actions architecture="x86_64" os="linux">
+                    <action type="download_by_url">http://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0.2_linux-gnu_x86_64_static.tar.bz2</action>
+                    <action type="move_directory_files">
+                        <source_directory>bin</source_directory>
+                        <destination_directory>$INSTALL_DIR</destination_directory>
+                    </action>
+                </actions>
+                <!-- This actions tag is only processed if none of the above tags resulted in a successful installation. -->
+                <actions>
+                    <action type="shell_command">echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported."</action>
+                    <action type="shell_command">echo "Your machine details (the output from 'uname' and 'arch'):"</action>
+                    <action type="shell_command">uname</action>
+                    <action type="shell_command">arch</action>
+                    <action type="shell_command">echo "If pre-compiled MIRA binaries are now available for this, please report this"</action>
+                    <action type="shell_command">echo "via https://github.com/peterjc/pico_galaxyt/issues - thank you!"</action>
+                    <action type="shell_command">false</action>
+                    <!-- The 'false' command will return an error, so Galaxy should treat this as a failed install -->
+                </actions>
+                <!-- The $PATH environment variable is only set if one of the above <actions> tags resulted in a successful installation. -->
+                <action type="set_environment">
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable>
+                </action>
+                <action type="set_environment">
+                    <environment_variable action="set_to" name="MIRA4">$INSTALL_DIR</environment_variable>
+                </action>
+            </actions_group>
+        </install>
+        <readme>
+Downloads MIRA v4.0.2 from Sourceforge, requesting Bastien's precompiled binaries
+for 64 bit (x86_64) Linux or Mac OS X. Other platforms where compilation from
+source would be required (e.g. 32 bit Linux) are not supported by this automated
+installation script.
+
+http://chevreux.org/projects_mira.html
+http://sourceforge.net/projects/mira-assembler/
+        </readme>
+    </package>
+</tool_dependency>