Next changeset 1:70248e6e3efc (2015-08-05) |
Commit message:
Uploaded v0.0.4, previously only on the TestToolShed |
added:
test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/empty_file.dat test-data/header.mira test-data/tvc_bait.fasta test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_mini.fastq test-data/tvc_mini_bait_neg.fastq test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq tools/mira4/README.rst tools/mira4/mira4.py tools/mira4/mira4_bait.py tools/mira4/mira4_bait.xml tools/mira4/mira4_convert.py tools/mira4/mira4_convert.xml tools/mira4/mira4_de_novo.xml tools/mira4/mira4_make_bam.py tools/mira4/mira4_mapping.xml tools/mira4/mira4_validator.py tools/mira4/repository_dependencies.xml tools/mira4/tool_dependencies.xml |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/U13small_m.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/U13small_m.fastq Fri Nov 21 06:42:56 2014 -0500 |
b |
b"@@ -0,0 +1,104 @@\n+@122533a.t3\n+ACGCGTTATTAAAGACTTTTTTTTTGGTTTTTTTCAAGTTCAAGTATTCTTTATTCAAAGTTGAAACATGTACCATACTGCATTATTGCAAAAATTCACTGGTACAAAACACTTTGCAGCTGGTGAGAAGGCAATAAAAAGTTGATTTTTAAACTCATTACTATAAATTATTCTTACAGTACTTTGCAAATTCAGAATTTCAAACTGCATGTTCTTTTTCTAAATTGCCCACAGTACTCGAGGTTCCTGAAGCTAAGGCAGCTGTTTCATAGGAGGGGGAAGAAGTATCAAATCTCTTGGGATTTCCATTTCTCTTTCCATGCCGACATACTTCAGGGCATCTTCCTGACTGTCTCTTTTTTCGGGAAACTTCTCTTCGCCAATCTGGATGGCTCTCTCGGCAAAAATACCTATCCTGTGATCACCGTTAACCATCATAACTTTTGCATACCACTTTGGATTTACCACCGATATGCCACAACGATTTTTGTTACCCTTGCAGGTTGCACCACCCAAAATCGTTGTTCTTTTGCCCTTGCCAAATTCTGGGCCCCAAGGGCCAATTCCCCAAAATTAATCCTTTTAATTTCGTGGGCCGCGTTCCAGCTGTCCCCCTTGTTGAAGGTTTTTTTTCGCCCCCCGGCTCTCCCCCCCCCTTTTTAAACCCCGTAAACCATTATAGTTGGTTAAACCCCCGGGGGGTTCCCCCTTGTAAATTTTACCCCCCCCTCTCCCCCATTTAAAAATATGTGCCCTTTTTCTCCCCCCCACTGGGCCCGGGGTTTTCTCCTATTCCGGGGAAAGAAAAACCCTGTTTTCTCTTCGCCAAAATACTCCCCCCCCCCTTTTTATAAAAAAAAAAAAAACTTGGGGGCCCCCACCCCCCCCCGGGGGGGAAAAAAAAAAGCGCGGGGTTTTTGCGCGTTTTTTTTGGGGGGGGGGGGGTTTCTTTTCCCCCCCTTTTTTCCTTCCCCCCCAAAGGAGAAAAAATCTTTTTTGTTGGGGCGCCTCCCGGCGCCCCTCCCGGGGTTGGCGCGGCGAAAACGCGGTTTTTCCCCCCCCCCCAAAGGGGGGGGCGCAGATACCGCGTTTACTCCCCATAAATACTCCGGGGGGGAATAACATCCCGGGAAAAAAAAATTTTATATCTATATATGGGGCCTCCCAAAAAGGGGCATAAAACACGGAGAAACCGGCGCCCTTGTGTGGGCGTTTTTCTCTCAATGGGCGCCCCCCCCCTCAACAAAAATAAAAA\n++\nn+@122533a.t7\n+GTTCGAGCAATATCCCTACGATCAACTAGAACAATGATTTTGTGGTGGATGCAACCCGCAAGGGTAACAAAATTCGTTTTGCAAATCATTCGGTAAATCCAAACTGCTATGCAAAAGTTATGATGGTTAACGGTGATCACAGGATAGGTATTTTTGCCAAGAGAGCCATCCAGACTGGCGAAGAGCTGTTTTTTGATTACAGATACAGCCAGGCTGATGCCCTGAAGTATGTCGGCATCGAAAGAGAAATGGAAATCCCTTGACATCTGCTACCTCCTCCCCCTCCTCTGAAACAGCTGCCTTAGCTTCAGGAACCTCGAGTACTGTGGGCAATTTAAAAAAAGAACATGCAGTTTGAAATTCTGAATTTGCAAAGTACTGTAAGAATAATTTATAGTAATGAGTTAAAAATCAACTTTTATTGCCTTCCACCACTGCAAGTGTTTGTACCGTGAATTTTGCAATAATGCCATATGGTACATTTTCCACTTTGAATAAAGAATACTTGAGCTTGAAAAAAAAAAAAAAAAAAGATTCTTTATTTAAGCGGCCGCGAGCTTTTCCCTTTAATAGGGTTAATTTAACTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n++\n+&&&,1/?/,/(+)+'('+)(1.+*+,0**1-1574=====EBAABA=:=56;6879=>7-025<???@BA>>>;?ACBA?A?AAAEBEA?ABAA=FF@@??AA?@AA?A=@=A<<ACAAAD?==>@==@=@E=@@A@=A@;2:22-&623@DFBAABB=59;/888B?B?<72;;:@==84.)1,9AAEEFECB???;B?<>??;BCBAAAAAA?==>=@DA??;7.<8<524/37AA??><.,1.87====AEB@AAA===869BA@?=@=?AAAABBABBABAAAAAC==A??ABBBEEDAEEBA97:053064:A7;)2,,2.63:5==>=@:::A@;A=7A==A?AAA?A=9====??=D====B@@=:E=A@AAA?;4:6=64==?AABAB05**33/,116>E@==9=:=:?@=:A=466,-)*..--/1230627:3666;.,*(1,996E<6;5@=8=@8-(/)"..b"AAAACAATAGAAATTAGTTATCCTGTCACGTGGGTTAAATGCAGCTGTTAATAAAATAGGCAAAGTAGTTGTCAAGAGTTAATCAGATACACTTCTGCAAGGATGACTAGTTTTATAAAATAAAATAATGGCATCAAAATGAATACCAGTTGAGTAATGTATTACTTTAAAATAACTGTGGGATCTTAAGTCATTTTATAAACACACTAGACCCATCAGACTCAATTTTTTTTTTCAATTTCAAGTATTCTTTATTCCAAGTTGAAAAATATCCATACTGCATTATTGCAAAAATTCACTGGTACAAAACACTTTGCAGCCGGGTGAGAAGGCCATAAAAAATTGAGTGTTTAAACTCATTGCTATAAATTATTCTTATAGTTCTCTTGCCAATTTCGAAATTTCAAACGGGCCTTTTCTTTTTTCTAAATTGCCCACCGTTTCCCGAAGGTTCCTGAAACCAAACTGAACGGAAGAAGGGAAAGTGTGTGAATAACTC\n++\n+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!=9=;49.-,,66,2(*18=56--1A;CEC?4787<?A;;;A:=BBAA=@@992;A>@D?EAEB894+75??BD?AAAAA??@=B=@?A??B???ABBEAAAABA@@@?CCFBBBAC@A>???>;AA@ACA?;8817AA?=;8;58?DDBAAAC???>CC93.*9.8CAAABAEDA@>D????A@??5:94>?:@BAA=??A?CDA@CCAB@AADC??97176/6;1?ADABAAAAAADAAAAEAAAAAAAAAAAAD??@??@DDAAA??8===?AAAAAAAADAAABABBBCAADAADDFAD@?CADDD?=A<5;2686==AAAC@AA@DDFCAFD??;;:8AA=>@:=9:==>>=88:+363?=;?@@EDFAAAAA?AA?AA??AAA===C?;<?@??=844.97?A@?:9A97B;;=>>AD9::646+6>2>806669>@??;8@???@=<<;@;E@D@AABAD>=>>=@===AAAEFFAAAAAAA??59?=@?20)),.7<@DC>==9=>A@;6532748?@@;<5<==A;==8==61**0-12173>@<@=A@2935<EBAA??AA??B>81..2988:61*25:<B@CCAADDEAAA??B?AA61-+34/836ADAA@98/8:55-397BD:1+,.2,.**/+--0?<8-,2*-24=;;5?>/5+,*24.,.+(+))*(-*(,))-)(-*(9,,,,(*.6@AA>-/-/((),+23;67>61)()(*()****,5:>A8;-,-*())))+))),,-,*+*,%15*43,+)))*(-3+,,65,*++,/-+((+,.36-4,+)+((0,())+&3*.)*++**)/4(5/(3..*)-/0453315702**-*14*-+/+(+.*((++\n+@U13e07f02.t2\n+ACATGCTGCAGTCGATCTAGAGGATCCCATGTCAAGGGATTTCCATTTCTCTTTCAATGCCAACATACTTCAGGGCATCAGCCTGGCTGTATCTGTAATCAAAAAACAGTTCTTCGCAAGTCTGGATGGCTCTCCTGACAAAAACACCTATCCTGTGATCACCATTAACCATCATAACTTTTGCATAGCAGTTTGGATTTACCGAATGATTTGCAAAACGAATTTTGTTACCCTTGCAGGTTGCATCCACCACAAAATCATTGTTCAAGTTGAACAGAAAGCTGCACATGTATTTATCACACACTTTCCCTCTTCCGTCAGCTTAGCTTCAGGAACCTCGAGTACTGTGGGCAATTTAAAAAAAGAAAATGCCGTTTGAAATTCTGAATTTGCAAAGTACTATAAGAATAATTTATAGCAATGAGTTTAAAACTCGACTTTTTATTGCCTTCTCACCAGCTGCAAAGTGTTTTGTACCAGTGAATTTTTGCAACAATGCAGTATGGTTTATTTTTCAACTTTGAATAAAGAATACTTGTAAATTGAAAAAAAAAATTGGAGTTCTGGATGGGTCTACGTGGTGTTTATAAAATAACCTAACGCATCGCCCCATCCAGCTTTTTTTCTGAGAGAGTACAGATACCAGCCACTCCCGCGGGGTTGTTTCGATCTTGCAATGCCCCGCCAACTTTTTAATTTTTAGCAGAAAACCCCAGGTTCTAGCTCCCATTGACCAGAAAAATTGTTTATCCTCGGAAATACAAACGCCCTGTTGGAACCACAACTCATGCCTCTCTGCGCCCCTCAAGTTCGTGTAAATGTGAAAACCAGCGCCCCGCCCCGAGTTTTAGAAACGCCCGCCCCGCCTTGTGGAAGCCTTAGGGGAAGTATAACGCCCAGCGACGTTCTGCTCTCCTGGACAGCGTCGGCGTATGGTATCTGACGGTCTAAACATACGTCACGTAACGGGAGCCCCCCTCTTCCGCATCTCTCCCCACTATTTGTGGCTAGCAGAAGAGAAATAATAAGCCGCAGCGTATCTGATGAGGAGCAGACGTTATCACGCTCTGCGGTGGCGGTCTTTAGAACACAAAATTTCGCTGTTGTCGCGCCCTTCTCCGACGGAAAAGAGAAAAAACCCTCTCTGTCTTTTCTATTAAAATAAGTAATTGGTGGGTTTTTTCTGCTGCCTTCTGACGACGACCGAAGAGAAACGCGGGTTTTGTATAACGCAGATATCACATCGCGCGCCCTCCGCGTCCG\n++\nn" |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/U13small_m.mira4_de_novo.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/U13small_m.mira4_de_novo.fasta Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,47 @@ +>MIRA_rep_c1 +aaatttcataaaatttctcatttcaaaaacgagaaaattaaggccttgcggggttaaatc +gttatcaagtaatcagtcggctcactgcccagtgtggacactggccaatattatggcacc +agcctttgagaaaaggaaacctttttttgaatccatctccctgacctggggtttgggttg +ggttttatcagcatagggtgacaggcaagttcaatctgattggatcctggatcccatcat +ggggtatctgttcctaaatcattccctgatcctcggtcagagcgagcgcttaggttcccg +ctgcagttaaacacctggttcatctgggcatactaaggttatgactttccacctgggagt +ccacgacaactgaaaaaactcaaaacttctttaagtagaagttgagcctgaagctgaggc +agctgtttcagagaggaggaggaggaggagatagcaaatgtcaagggatttccatttctc +tttcaatgccaacatacttcagggcatcagcctggctgtatctgtaatcaaaaaacagtt +cttcgcaagtctggatggctctcctgacaaaaacacctatcctgtgatcaccattaacca +tcataacttttgcatagcagtttggatttaccgaatgatttgcaaaacgaattttgttac +ccttgcaggttgcatccaccacaaaatcattgttcaagttgaacagaaagctgcacatgt +atttatcacacactttccctcttccgtcagcttagcttcaggaacctcgagtactgtggg +caatttagaaaaagaaaatgccgtttgaaattctgaatttgcaaagtactataagaataa +tttatagcaatgagtttaaaactcaactttttattgccttctcaccagctgcaaagtgtt +ttgtaccagtgaatttttgcaataatgcagtatggtatatttttcaactttgaataaaga +atacttgaaattgaaaaaaaaaattgagtctgatgggtctagtgtgtttataaaatgact +taagatcccacagttattttaaagtaatacattactcaactggtattcattttgatgcca +ttattttattttataaaactagtcatccttgcagaagtgtatctgattaactcttgacaa +ctactttgcctattttattaacagctgcatttaacccacgtgacaggataactaatttct +attgtttttgtatttttgcctcctccttccaagatagcagtagagtactgtgttaaatct +tgcttcagaagagcttgtttaaaatagttataaagaaagtaaaatacatagaaaaacacg +ttataattttaaattaaaaaagaaaaaagatgtctactctgaataaattatgcatacata +taaggtctgaacaaaaacatgaaaagatgggctctgagtatcaattttaaaaatctttac +cagtataacattactcaaacaaaaattaaaatttaatataattaagtacatattccaaga +caacaaatggaccttgactgttaattctgg +>MIRA_rep_c2 +gcaatatcgtgacgatatgtawgataaatacatgtgcagctytctgttcaacttgaacaa +tgattttgtggtggatgcaacccgcaagggtaacaaaattcgttttgcaaatcattcggt +aaatccaaactgctatgcaaaagttatgatggttaacggtgatcacaggatakgtatttt +tgccaagagagccatccagactggcgaaragctgttttttgattacagatacagccaggc +tgatgccctgaagtatgtcggcatcgaaagagaaatggaaatcccttgacatctgctacc +tcctccccctcctctgaaacagctgccttagcttcaggaacctcgagtactgtgggcaat +ttaaaaaaagaacatgcagtttgaaattctgaatttgcaaagtactgtaagaataattta +tagtaatgagtttaaaaatcaactttttattgccttctcaccagctgcaaagtgttttgt +wccagtgaattttgcaataatgcagtatggtacattttcactttgaataaagaatacttg +a +>MIRA_rep_c3 +gcgttattaaagacttttttttwtttttttttcaagttcaagtattctttattcaaagtt +gaaaaatgtaccatactgcattattgcaaaaattcactggtacaaaacactttgcagctg +gtgagaaggcaataaaaagttgatttttaaactcattactataaattattcttacagtac +tttgcaaattcagaatttcaaactgcatgttctttttctaaattgcccacagtactcgag +gttcctgaagctaaggcagctgtttcagaggagggggargaagtaycaaatytcwaggga +tttccatttctctttccatgccgamatacttcagggcatcagcctgactgtmtctgtaat +caaaaaacakctcttcgccavtctggatggctctctyggcaaaaatacctatcctgtgat +caccgttaaccatcataacttttgcatagcattt |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/ecoli.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ecoli.fastq Fri Nov 21 06:42:56 2014 -0500 |
b |
b"@@ -0,0 +1,20164 @@\n+@frag_1\n+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTC\n++\n+##%')+.024JMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_1_a\n+GAGACATATTGCCCGTTGCAGTCAGAATGAAAAGCT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%##\n+@frag_2\n+AGAGACATATTGCCCGTTGCAGTCAGAATGAAAAGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%#\n+@frag_3\n+CTTTTCATTCTGACTGCAACGGGCAATATGTCTCTG\n++\n+%')+.024JMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_4\n+ACAGAGACATATTGCCCGTTGCAGTCAGAATGAAAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'\n+@frag_5\n+TTTCATTCTGACTGCAACGGGCAATATGTCTCTGTG\n++\n+)+.024JMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_6\n+ACACAGAGACATATTGCCCGTTGCAGTCAGAATGAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMJ420.+\n+@frag_7\n+TCATTCTGACTGCAACGGGCAATATGTCTCTGTGTG\n++\n+.024JMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_8\n+CCACACAGAGACATATTGCCCGTTGCAGTCAGAATG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMJ420\n+@frag_9\n+ATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGA\n++\n+24JMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_10\n+ATCCACACAGAGACATATTGCCCGTTGCAGTCAGAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMJ4\n+@frag_11\n+TCTGACTGCAACGGGCAATATGTCTCTGTGTGGATT\n++\n+JMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_12\n+TAATCCACACAGAGACATATTGCCCGTTGCAGTCAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_13\n+TGACTGCAACGGGCAATATGTCTCTGTGTGGATTAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_14\n+TTTAATCCACACAGAGACATATTGCCCGTTGCAGTC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_15\n+ACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_16\n+TTTTTAATCCACACAGAGACATATTGCCCGTTGCAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_17\n+TGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_18\n+TTTTTTTAATCCACACAGAGACATATTGCCCGTTGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_19\n+CAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_20\n+TCTTTTTTTAATCCACACAGAGACATATTGCCCGTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_21\n+ACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_22\n+ACTCTTTTTTTAATCCACACAGAGACATATTGCCCG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_23\n+GGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_24\n+ACACTCTTTTTTTAATCCACACAGAGACATATTGCC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_25\n+GCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_26\n+AGACACTCTTTTTTTAATCCACACAGAGACATATTG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_27\n+AATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_28\n+TCAGACACTCTTTTTTTAATCCACACAGAGACATAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_29\n+TATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_30\n+TATCAGACACTCTTTTTTTAATCCACACAGAGACAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_31\n+TGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_32\n+GCTATCAGACACTCTTTTTTTAATCCACACAGAGAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_33\n+TCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_34\n+CTGCTATCAGACACTCTTTTTTTAATCCACACAGAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_35\n+TCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_36\n+AGCTGCTATCAGACACTCTTTTTTTAATCCACACAG\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_37\n+TGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_38\n+GAAGCTGCTATCAGACACTCTTTTTTTAATCCACAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_39\n+TGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_40\n+CAGAAGCTGCTATCAGACACTCTTTTTTTAATCCAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_41\n+TGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_42\n+TTCAGAAGCTGCTATCAGACACTCTTTTTTTAATCC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_43\n+GATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAAC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_44\n+AGTTCAGAAGCTGCTATCAGACACTCTTTTTTTAAT\n++\n+MMMMMMMMMMMMMMMMMMM"..b"4997\n+AATTGATGATGAATCATCAGTAAAATCTATTCATTA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_4998\n+ATAATGAATAGATTTTACTGATGATTCATCATCAAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_4999\n+TTGATGATGAATCATCAGTAAAATCTATTCATTATC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5000\n+AGATAATGAATAGATTTTACTGATGATTCATCATCA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5001\n+GATGATGAATCATCAGTAAAATCTATTCATTATCTC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMK\n+@frag_5002\n+TGAGATAATGAATAGATTTTACTGATGATTCATCAT\n++\n+KKMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5003\n+TGATGAATCATCAGTAAAATCTATTCATTATCTCAA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMKKK\n+@frag_5004\n+ATTGAGATAATGAATAGATTTTACTGATGATTCATC\n++\n+KKKKMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5005\n+ATGAATCATCAGTAAAATCTATTCATTATCTCAATA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMKKKK#\n+@frag_5006\n+CTATTGAGATAATGAATAGATTTTACTGATGATTCA\n++\n+##KKKKMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5007\n+GAATCATCAGTAAAATCTATTCATTATCTCAATAGC\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMMMKKKK##%\n+@frag_5008\n+AGCTATTGAGATAATGAATAGATTTTACTGATGATT\n++\n+'%##KKKKMMMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5009\n+ATCATCAGTAAAATCTATTCATTATCTCAATAGCTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMMMKKKK##%')\n+@frag_5010\n+AAAGCTATTGAGATAATGAATAGATTTTACTGATGA\n++\n++)'%##KKKKMMMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5011\n+CATCAGTAAAATCTATTCATTATCTCAATAGCTTTT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMMKKKK##%')+.\n+@frag_5012\n+GAAAAGCTATTGAGATAATGAATAGATTTTACTGAT\n++\n+0.+)'%##KKKKMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5013\n+TCAGTAAAATCTATTCATTATCTCAATAGCTTTTCA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMKKKK##%')+.02\n+@frag_5014\n+ATGAAAAGCTATTGAGATAATGAATAGATTTTACTG\n++\n+420.+)'%##KKKKMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5015\n+AGTAAAATCTATTCATTATCTCAATAGCTTTTCATT\n++\n+MMMMMMMMMMMMMMMMMMMMMKKKK##%')+.024J\n+@frag_5016\n+GAATGAAAAGCTATTGAGATAATGAATAGATTTTAC\n++\n+MJ420.+)'%##KKKKMMMMMMMMMMMMMMMMMMMM\n+@frag_5017\n+TAAAATCTATTCATTATCTCAATAGCTTTTCATTCT\n++\n+MMMMMMMMMMMMMMMMMMMKKKK##%')+.024JMM\n+@frag_5018\n+CAGAATGAAAAGCTATTGAGATAATGAATAGATTTT\n++\n+MMMJ420.+)'%##KKKKMMMMMMMMMMMMMMMMMM\n+@frag_5019\n+AAATCTATTCATTATCTCAATAGCTTTTCATTCTGA\n++\n+MMMMMMMMMMMMMMMMMKKKK##%')+.024JMMMM\n+@frag_5020\n+GTCAGAATGAAAAGCTATTGAGATAATGAATAGATT\n++\n+MMMMMJ420.+)'%##KKKKMMMMMMMMMMMMMMMM\n+@frag_5021\n+ATCTATTCATTATCTCAATAGCTTTTCATTCTGACT\n++\n+MMMMMMMMMMMMMMMKKKK##%')+.024JMMMMMM\n+@frag_5022\n+CAGTCAGAATGAAAAGCTATTGAGATAATGAATAGA\n++\n+MMMMMMMJ420.+)'%##KKKKMMMMMMMMMMMMMM\n+@frag_5023\n+CTATTCATTATCTCAATAGCTTTTCATTCTGACTGC\n++\n+MMMMMMMMMMMMMKKKK##%')+.024JMMMMMMMM\n+@frag_5024\n+TGCAGTCAGAATGAAAAGCTATTGAGATAATGAATA\n++\n+MMMMMMMMMJ420.+)'%##KKKKMMMMMMMMMMMM\n+@frag_5025\n+ATTCATTATCTCAATAGCTTTTCATTCTGACTGCAA\n++\n+MMMMMMMMMMMKKKK##%')+.024JMMMMMMMMMM\n+@frag_5026\n+GTTGCAGTCAGAATGAAAAGCTATTGAGATAATGAA\n++\n+MMMMMMMMMMMJ420.+)'%##KKKKMMMMMMMMMM\n+@frag_5027\n+TCATTATCTCAATAGCTTTTCATTCTGACTGCAACG\n++\n+MMMMMMMMMKKKK##%')+.024JMMMMMMMMMMMM\n+@frag_5028\n+CCGTTGCAGTCAGAATGAAAAGCTATTGAGATAATG\n++\n+MMMMMMMMMMMMMJ420.+)'%##KKKKMMMMMMMM\n+@frag_5029\n+ATTATCTCAATAGCTTTTCATTCTGACTGCAACGGG\n++\n+MMMMMMMKKKK##%')+.024JMMMMMMMMMMMMMM\n+@frag_5030\n+GCCCGTTGCAGTCAGAATGAAAAGCTATTGAGATAA\n++\n+MMMMMMMMMMMMMMMJ420.+)'%##KKKKMMMMMM\n+@frag_5031\n+TATCTCAATAGCTTTTCATTCTGACTGCAACGGGCA\n++\n+MMMMMKKKK##%')+.024JMMMMMMMMMMMMMMMM\n+@frag_5032\n+TTGCCCGTTGCAGTCAGAATGAAAAGCTATTGAGAT\n++\n+MMMMMMMMMMMMMMMMMJ420.+)'%##KKKKMMMM\n+@frag_5033\n+TCTCAATAGCTTTTCATTCTGACTGCAACGGGCAAT\n++\n+MMMKKKK##%')+.024JMMMMMMMMMMMMMMMMMM\n+@frag_5034\n+TATTGCCCGTTGCAGTCAGAATGAAAAGCTATTGAG\n++\n+MMMMMMMMMMMMMMMMMMMJ420.+)'%##KKKKMM\n+@frag_5035\n+TCAATAGCTTTTCATTCTGACTGCAACGGGCAATAT\n++\n+MKKKK##%')+.024JMMMMMMMMMMMMMMMMMMMM\n+@frag_5036\n+CATATTGCCCGTTGCAGTCAGAATGAAAAGCTATTG\n++\n+MMMMMMMMMMMMMMMMMMMMMJ420.+)'%##KKKK\n+@frag_5037\n+AATAGCTTTTCATTCTGACTGCAACGGGCAATATGT\n++\n+KKK##%')+.024JMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5038\n+GACATATTGCCCGTTGCAGTCAGAATGAAAAGCTAT\n++\n+MMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%##KK\n+@frag_5039\n+TAGCTTTTCATTCTGACTGCAACGGGCAATATGTCT\n++\n+K##%')+.024JMMMMMMMMMMMMMMMMMMMMMMMM\n+@frag_5039_a\n+AGACATATTGCCCGTTGCAGTCAGAATGAAAAGCTA\n++\n+MMMMMMMMMMMMMMMMMMMMMMMMJ420.+)'%##K\n" |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/ecoli.mira4_de_novo.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ecoli.mira4_de_novo.fasta Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,86 @@ +>MIRA_c1 +gccagggctattttaccggcgcagtatcgccgccaggattgcattgcgcacgggcgacat +ctggcaggcttcattcacgcctgctattcccgtcagcctgagcttgccgcgaagctgatg +aaagatgttatcgctgaaccctaccgtgaacggttactgccaggcttccggcaggcgcgg +caggcggtcgcggaaatcggcgcggtagcgagcggtatctccggctccggcccgaccttg +ttcgctctgtgtgacaagccggaaaccgcccagcgcgttgccgactggttgggtaagaac +tacctgcaaaatcaggaaggttttgttcatatttgccggctggatacggcgggcgcacga +gtactggaaaactaaatgaaactctacaatctgaaagatcacaacgagcaggtcagcttt +gcgcaagccgtaacccaggggttgggcaaaaatcaggggctgttttttccgcacgacctg +ccggaattcagcctgactgaaattgatgagatgctgaagctggattttgtcacccgcagt +gcgaagatcctctcggcgtttattggtgatgaaatcccacaggaaatcctggaagagcgc +gtgcgcgcggcgtttgccttcccggctccggtcgccaatgttgaaagcgatgtcggttgt +ctggaattgttccacgggccaacgctggcatttaaagatttcggcggtcgctttatggca +caaatgctgacccatattgcgggtgataagccagtgaccattctgaccgcgacctccggt +gataccggagcggcagtggctcatgctttctacggtttaccgaatgtgaaagtggttatc +ctctatccacgaggcaaaatcagtccactgcaagaaaaactgttctgtacattgggcggc +aatatcgaaactgttgccatcgacggcgatttcgatgcctgtcaggcgctggtgaagcag +gcgtttgatgatgaagaactgaaagtggcgctagggttaaactcggctaactcgattaac +atcagccgtttgctggcgcagatttgctactactttgaagctgttgcgcagctgccgcag +gagacgcgcaaccagctggttgtctcggtgccaagcggaaacttcggcgatttgacggcg +ggtctgctggcgaagtcactcggtctgccggtgaaacgttttattgctgcgaccaacgtg +aacgataccgtgccacgtttcctgcacgacggtcagtggtcacccaaagcgactcaggcg +acgttatccaacgcgatggacgtgagtcagccgaacaactggccgcgtgtggaagagttg +ttccgccgcaaaatctggcaactgaaagagctgggttatgcagccgtggatgatgaaacc +acgcaacagacaatgcgtgagttaaaagaactgggctacacttcggagccgcacgctgcc +gtagcttatcgtgcgctgcgtgatcagttgaatccaggcgaatatggcttgttcctcggc +accgcgcatccggcgaaatttaaagagagcgtggaagcgattctcggtgaaacgttggat +ctgccaaaagagctggcagaacgtgctgatttacccttgctttcacataatctgcccgcc +gattttgctgcgttgcgtaaattgatgatgaatcatcagtaaaatctattcattatctca +atagcttttcattctgactgcaacgggcaatatgtctctgtgtggattaaaaaaagagtg +tctgatagcagcttctgaactggttacctgccgtgagtaaattaaaattttattgactta +ggtcactaaatactttaaccaatataggcatagcgcacagacagataaaaattacagagt +acacaacatccatgaaacgcattagcaccaccattaccaccaccatcaccattaccacaa +cggtgcgggctgacgcgtacaggaaacacagaaaaaagcccgcacctgacagtgcgggct +ttttttttcgaccaaaggtaacgaggtaacaaccatgcgagagttgaagttcggcggtac +atcagtggcaaatgcagaacgttttctgcgtgttgccgatattctggaaagcaatgccag +gcaggggcaggtggccaccgtcctctctgcccccgccaaaacaccaaccacctggtggcg +atgattgaaaaaaccattagcggccaggatgctttacccaatatcagcgatgccgaacgt +atttttgccgaacttttgacgggactcgccgccgcccagccggggttcccgctggcgacg +tcaattgaaaactttcgtcgatcaggaatttgcccaaataaaacatgtcctgcatggcat +tagtttgttggggcagtgcccggatagcatcaacgctgcgctgatttgccgtggcgagaa +aatgtcgatcgccgttatggccggcgtattagaagcgcgcggtcacaacgttactgttat +cgatccggtcgaaaaactgctggcagtggggcattacctcgaatctaccgtcgatattgc +tgagtccacccgccgtattgcggcaagccgcattccggctgatcacatggtgctgatggc +aggtttcaccgccggtaatgaaaaaggcgaactggtggtgcttggacgcaacggttccga +ctactctgctgcggtgctggctgcctgtttacgcgccgattgttgcgagatttggacgga +cgttgacggggtctatacctgcgacccgcgtcaggtgcccgatgcgaggttgttgaagtc +gatgtcctaccaggaagcgatggagctttcctacttcggcgctaaagttcttcacccccg +caccattacccccatcgcccagttccagatcccttgcctgattaaaaataccggaaatcc +tcaagcaccaggtacgctcattggtgccagccgtgatgaagacgaattaccggtcaaggg +catttccaatctgaataacatggcaatgttcagcgtttctggtccggggatgaaagggat +ggtcggcatggcggcgcgcgtctttgcagcgatgtcacgcgcccgtatttccgtggtgct +gattacgcaatcatcttccgaatacagcatcagtttctgcgttccacaaagcgactgtgt +gcgagctgaacgggcaatgcaggaagagttctacctggaactgaaagaaggcttactgga +gccgctggcagtgacggaacggctggccattatctcggtggtaggtgatggtatgcgcac +cttgcgtgggatctcggcgaaattctttgccgcactggcccgcgccaatatcaacattgt +cgccattgctcagggatcttctgaacgctcaatctctgtcgtggtaaataacgatgatgc +gaccactggcgtgcgcgttactcatcagatgctgttcaataccgatcaggttatcgaagt +gtttgtgattggcgtcggtggcgttggcggtgcgctgctggagcaactgaagcgtcagca +aagctggctgaagaataaacatatcgacttacgtgtctgcggtgttgccaactcgaaggc +tctgctcaccaatgtacatggccttaatctggaaaactggcaggaagaactggcgcaagc +caaagagccgtttaatctcgggcgcttaattcgcctcgtgaaagaatatcatctgctgaa +cccggtcattgttgactgcacttccagccaggcagtggcggatcaatatgccgacttcct +gcgcgaaggtttccacgttgtcacgccgaacaaaaaggccaacacctcgtcgatggatta +ctaccatcagttgcgttatgcggcggaaaaatcgcggcgtaaattcctctatgacaccaa +cgttggggctggattaccggttattgagaacctgcaaaatctgctcaatgcaggtgatga +attgatgaagttctccggcattctttctggttcgctttcttatatcttcggcaagttaga +cgaaggcatgagtttctccgaggcgaccacgctggcgcgggaaatgggttataccgaacc +ggacccgcgagatgatctttctggtatggatgtggcgcgtaaactattgattctcgctcg +tgaaacgggacgtgaactggagctggcggatattgaaattgaacctgtgctgcccgcaga +gtttaacgccgagggtgatgttgccgcttttatggcgaatctgtcacaactcgacgatct +ctttgccgcgcgcgtggcgaaggcccgtgatgaaggaaaagttttgcgctatgttggcaa +tattgatgaagatggcgtctgccgcgtgaagattgccgaagtggatggtaatgatccgct +gttcaaagtgaaaaatggcgaaaacgccctggccttctatagccactattatcagccgct +gccgttggtactgcgcggatatggtgcgggcaatgacgttacagctgccggtgtctttgc +tgatctgctacgtaccctctcatggaagttaggagtctgacatggttaaagtttatgccc +cggcttccagtgccaatatgagcgtcgggtttgatgtgctcggggcggcggtgacacctg +ttgatggtgcattgctcggagatgtagtcacggttgaggcggcagagacattcagtctca +acaacctcggacgctttgccgataagctgccgtcagaaccacgggaaaatatcgtttatc +agtgctgggagcgtttttgccaggaactgggtaagcaaattccagtggcgatgaccctgg +aaaagaatatgccgatcggttcgggcttaggctccagtgcctgttcggtggtcgcggcgc +tgatggcgatgaatgaacactgcggcaagccgcttaatgacactcgtttgctggctttga +tgggcgagctggaaggccgtatctccggcagcattcattacgacaacgtggcaccgtgtt +ttctcggtggtatgcagttgatgatcgaagaaaacgacatcatcagccagcaagtgccag +ggtttgatgagtggctgtgggtgctggcgtatccggggattaaagtctcgacggcagaag +ccagggctattttaccggcgcagtatcgccgcca |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/header.mira --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/header.mira Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,2 @@ +@Version 2 0 +@Program MIRALIB |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_bait.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_bait.fasta Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,2 @@ +>fragment (intended to match some but not all of tvs_mini.fastq) +TTAGCCtttcgagcggccgcccgggcaggtctgaaaaacaccgcaaaatgccggcgggtcacggtttggggtcgcgcaccagagatctatttgctcgggcattcaggaAGCCTT |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_contigs.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_contigs.fasta Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,20 @@ +>mira_c1 +ttagcgtggtcgcggccgaggtaccctctaccatgaaaccaggcttgggtccctctggct +gtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgttct +tcttgatcctctccatgatctcctcatggcaccttgatggctggacatgttccacacgaa +catgaatcctcttccttatgattctgtttccaacctgcttgttgacctcaacaccaacag +cacgcttggtgacgttccatacccgacccgtgcggccatgatagaacttgtggggcatac +ctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatacgaa +ggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatctctgg +tgcgcgaccccaaaccgtgacccgccggcattttgcggtgtttttcagacctgcccgggc +ggccgctcgaaa +>mira_c2 +tttcgagcggycgcccggscgaggtaccctscaccatgaaaccaggcttgggtccctcwg +gctgyctcttggtgctgataatcttwccytgtgccttkgcctcagccttcaacttatcrt +tcttcttgatcctctccattatctcctcatggcamckagatggctggacatgttccacac +gaacatgaatcctcttccttatgattctgtttccaacctgyttgttgacctcaacaccaa +cagcgcgcttggtgacgttccatacccgacccgtgcggccatggtagaacttgtggggca +tacctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatac +gaaggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatccc +tggtgcgtgacctcaaaccgtgacccgccggcattttgaggtgtttttcagacctgcccg +ggcggccgctcgaaa |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_map_ref_strain.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_map_ref_strain.fasta Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,20 @@ +>mira_c1_bb +ttagcgtggtcgcggccgaggtaccctctaccatgaaaccaggcttgggtccctctggct +gtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgttct +tcttgatcctctccatgatctcctcatggcaccttgatggctggacatgttccacacgaa +catgaatcctcttccttatgattctgtttccaacctgcttgttgacctcaacaccaacag +cacgcttggtgacgttccatacccgacccgtgcggccatgatagaacttgtggggcatac +ctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatacgaa +ggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatctctgg +tgcgcgaccccaaaccgtgacccgccggcattttgcggtgtttttcagacctgcccgggc +ggccgctcgaaa +>mira_c2_bb +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxaccatgaaaccaggcttgggtccctctg +gctgtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgt +tcttcttgatcctctccattatctcctcatggcamckagatggctggacatgttccacac +gaacatgaatcctcttccttatgatyctgttwccaacctgyttgttgacctcaacaccaa +cagcgcgcttggtgacgttccatacccgacccgtgcggccatggtagaacttgtggggca +tacctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatac +gaaggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatcyc +tggtgcgtgacctcaaaccgtgacccgccggcattttgaggtgtttttcagacctgcccg +ggcggccgctcgaaa |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_map_same_strain.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_map_same_strain.fasta Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,20 @@ +>mira_c1_bb +ttagcgtggtcgcggccgaggtaccctctaccatgaaaccaggcttgggtccctctggct +gtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgttct +tcttgatcctctccatgatctcctcatggcaccttgatggctggacatgttccacacgaa +catgaatcctcttccttatgattctgtttccaacctgcttgttgacctcaacaccaacag +cacgcttggtgacgttccatacccgacccgtgcggccatgatagaacttgtggggcatac +ctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatacgaa +ggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatctctgg +tgcgcgaccccaaaccgtgacccgccggcattttgcggtgtttttcagacctgcccgggc +ggccgctcgaaa +>mira_c2_bb +tttcgagcggncgcccggncgaggtaccctncaccatgaaaccaggcttgggtccctctg +gctgtctcttggtgctgataatcttaccttgtgccttggcctcagccttcaacttatcgt +tcttcttgatcctctccattatctcctcatggcamckagatggctggacatgttccacac +gaacatgaatcctcttccttatgattctgtttccaacctgyttgttgacctcaacaccaa +cagcgcgcttggtgacgttccatacccgacccgtgcggccatggtagaacttgtggggca +tacctttgtggatcgacccgttaaccttgacatcaacatagtcgccgactttgaagatac +gaaggtaagttgtgagatgggtaggacccttcttcctgaatgcccgagcaaatagatccc +tggtgcgtgacctcaaaccgtgacccgccggcattttgaggtgtttttcagacctgcccg +ggcggccgctcgaaa |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_mini.fastq Fri Nov 21 06:42:56 2014 -0500 |
b |
b"@@ -0,0 +1,24 @@\n+@gnlti136477918\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTACCCTCCACCATGAAACCAGGCTTGGGTCCCTCAGGCTGCCTCTTGGTGCTGATAATCTTTCCCTGTGCCTTTGCCTCAGCCTTCAACTTATCATTCTTCTTGATCCTCTCCATTATCTCCTCATGGCAACGAGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATCCTGTTACCAACCTGTTTGTTGACCTCAACACCAACAGCGCGCTTGGTAACATTCCAGACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGAACTCAAACCGTGACCCGCCGGCATTTTGAGGTGTTTTTCAGCTGCCTTGTTCACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n++\n+38<>><><<96-++42:AABBCCCCCCCCDFFFIYYIIIIIINTTTTNNNNNNTTTTTTNNIIIIHHHHHHYIFFFIDIINIITTTOQDDDHHHNTYTFFFIIINNIITDDDDDDFLLTTTLLLYYYYYYFFIIIILKOOYYYNNNNNNOOKKKKILLLFFIOOTTNLLLLLNYYYYKYFDDDLLLNNNTTNNLMKKSYNNJIIGGGGLLIILOOYYYYYYYYYTNNNNNTYYYYYYYTOLLLLLLNTTYYTTTLLTYYKKKONNNNLLLLGGINIIIIIINNNNNNNNIHHHHHHHHHHINIITTTTNNNNNTYTNNNNIIIFFDHHHFFINNNNIHHHDDEIDDDNNDDKQQQQMMMQQYYNNIDCBBBBAHIGGGKYYYOOD?<AACCCCCHCCC@>>>>HBBAAAA>@999AOOOYIIICC<<,,,99HHHFKK??C>>B>>H?6/+))42856301:7<>HHEI4/))-10449--0..((*4))*35A<9+++44>BB754---@<;42*))45:7024.(')))')++049>>41-'(,'(.2393222/3171((((-.4011/0+).)''),..4133><B=451119411+))<44:686:/066888888=::884))*'''**,''*-.''*,/2(*144+')64>;1/,'')''1*30+0..****(*0-.4-)*),'(''+,-((*+))**+,''''''***''***-*)121,''''(+*,,+-((****.0..,0*))*(),))''))*+,*)()))''''+'')'')**)()'','')'(**((*((*(((*441.-*****())+*''')-++*****-*((((**))))))*)))++***)(**11.()****0*-,((*--.***,((,,,**'')'''')'-((--,''**441***)+'(''*,*(\n+@gnlti136478624\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCACGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGATAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCTCTGGGTGCGGGAACTCAAACCGGGAACCGGCGGCATTTTGCGGTGTTTTTAGACCCTGCCGGGGGGGCGGTCGAAAGGCCGATTCTTGAGATTTTCCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXGGGGGTAGGAGGTTGTAATTGGAAAAACCTGGGGTAGCAAGTTAATGGCTTGAGCAATTCCGTTCGGCGGGTGGGTATAGAGAAGGGGCGGGCGATCGGGATCCGAAGATGGGGAGCGGATGGGGAGGAGAGGCAGGTGGGGATATAGGGGGGGGGTGGGGTAGGGGAGGGGCGGTGCTGTAGGGGGAGGGGCGGCGTTGGTTTTCTGTGTTACGAGTTGGGTGACCCGAAGTAATTGGGG\n++\n++1449>>>;=::AADDCCCCFIICCBB>???BBDDDDYYHBCCBBFF@@777BBG@>7584;;@DHDDDDDDMNIIIIIYYYTOYKKKMIDDDDDHOKKKQSTTTNNIIYYFFFFFIDDDDDIYOOIIDAA>DADDFDLLDDDIKKKKOKKKKKKYYYOOJJJOYYYYTOOKKPMMMMMSSSSMMMSSYYYYLJIIIID=====FKKKKKKYYYOOKKIIIIISSFFDIHIIKSSTOOKKKLYYSSKMIIIOOIIIDDDDHDDDIOOIIFFFIIIIKKKMIIIIIIMKKKKIIIFDDDDADDIDDDDDDDHDDDDFFF99///<<HFFFFFFFFGOOYTDDDHHH99,,,95>>>>47//-</3-822.446777BBBFFIOC>6.++-53:?:>7744213...772007:9:-++33>>DH>>??933;;FQ<93/+10++/.//-10234:1//223;:/,,***++'')'+,/)))-.2.++((.0***,))*,0(())''))))+'')***''))***))),669+,*****..''')*,**,*))))*'',)))'(++,++((*+*)*.*))''')***''.*))'')''''''***+)))++**(''''')****)''')'(***''**+/.)))*)')((''***(('')'')-))''''.'')))**'+''''**))''))***+((***)%(((***(((((,.,,(((((*(((+.(()'''')*(())(***((**-+,,)')''*/,''''**'''))((''*+((''''))*))'')'')),.)())'''''('*)**+***-*(')''))((+++0***(('')'')**()++*+**(')).5+*'''')*,---'''')'''\n+@gnlti136478626\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCTCAAAATGCCGGCGGGTCACGGTTTGAGGTCACGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTACCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGCGCTGTT"..b"TGGGTGTGGGGGTGGGTTTTGTTAGGCGAGGTGGTTTTTTTTTCTTTTTTTTTTTAGTGGAGGTGT\n++\n+04--46:<<B<<>@>HHEB<822<<IEHIHCCCCCCIIIITTIIIIIINNTTTTTYTIHHHHHHNNNIIDDDDFFNTKM>>?OQFFFMKOOTFDDDDHHIIIIOHFFFFFINTDDAAAADHHDDDOYNNHHFFDDDDDDFDC=AA=DIIIIFFNHHFFFFNNNNNNNNNNDD448DNTOOKKKOBB?DFGGGNOTOO555>>A>>>AAF:::>>@DB=====5AACOIIBCCBB<5005<41''+18EAAAHHHB>96-+,+14:AAIB??>>CD>>;87>5:30-14477<>@CDDD>>?==MQYI>H---88:77:<B>>=33000008<9::>BBBFHHCCC>IFDDDOOOQIQQII:2((+6<552228>DDDEH>>33399>31)''-.FFIMIIIO>>333;@II>71:37<AAEIAA778<B69,,,01BBIKFF>>>944,,,6:6/(((*44<<43,,,66AEH98,,,6/+**--..((*,1><::65/0*'))'(,-,)++*31+((*((**.,+*'')'()'''*++))''('*+26410''''+)(())''))*'(***++*))*((****(''''')++**)+'')*))*.-***))*)*-/****,-30.)''''''''***''''')-.*))'')''**++*))/,,((,+-***+)'''''')+'''.*)')'(0-+((+++)))'''(*+'''')'(**,***''''))*))'')))),''))''))*((***))/()(*''''++**((((((****(')))))*))'),))'')''.)))))))'')+,++')-))'(+))***))''))****++))))+1-**))**'(140''))**))'')+**(\n+@gnlti136479357\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTGTGGTCGCGGCCGAGGTACCCTGCACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGGACTCCAAACGTGAACTTGCCGGGCGGGGGGGAGAGGGGGAGCGGGGGGGGGGGAGAATAAGGGGGGAGGGGAGGGGGAGAGAAAAGGGAGGAGGGGGGGGGTAGGGAGGGAGAGGGAGGGGGGAGGGGGGGGGAGGGGGGGGGGGGAGGGAAGGGGGGGGAGGGGGGGAGGGGGAGGGGAGAGGGGGAGGGGAGGGGGGGGGGAGGGGAGGGGAAGAGGGGGGGGGGGGGGGGGGGAGGGGGGGAGGGGGGGGGGGAGGGGGGGGGGGGGGAGGGAGAAGGAGAAA\n++\n+.4<BB;>>>>>>>>FDCCCCCCIINIIIDCCCCCCDDDDYQKKFNNNCAAAAAINNIIINTIIHHDDDDDDDDDDDKITTTTLYYYYYLFFIIIILOKKKIIIIKKOKYFDDDDDFIIIIIKKKLLLLTIDDDDDFFDDDDDNNIIIIIKKDDDDHHJOYYSSMMFFADDDDLYSSB>666>BDDDDKOOKJJOOJJED==99AOIJJOOYYYLJJJLLTTTTLYYYYYYYYYYLLIIBBADDNOIIIIIINDAADDDDKOOIIIIIFDDA>7==@@DII??887BBOOFDDDDDIYYNNNHDDKOO?BBHHINODDAF>A>AADFFIIOGFFFFIITOOIDDDDDDDDDDDHHD89,,,<>FFFDD>99<<<B<845;<BAAA;>99=EBIIIIIOOD@@><>AB<8::AA:>AABHIHHHCCC99--+46CCCIIIIAA551-4440++)))4499+))019<>>>1/()0/-('''129.,//+((**+++8@@,*)11))*+***+++))%(,.*)))..,.2+**+8..)))),*))'')))''.''+)*++)+)))''))'(++++*))'''''))****))''))/.03:=741.''**),''''**))))))4**)')'').11.('*))'%)*-.2))*.0('''))(')''))****))('+'')''))****,((((**))1..''))***)1-1-.''),,''))%(.**)(')))*)-().-.***))1)''+''))****))''\n+@gnlti136479522\n+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n++\n+(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+\n" |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini_bait_neg.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_mini_bait_neg.fastq Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,16 @@ +@gnlti136477918 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTACCCTCCACCATGAAACCAGGCTTGGGTCCCTCAGGCTGCCTCTTGGTGCTGATAATCTTTCCCTGTGCCTTTGCCTCAGCCTTCAACTTATCATTCTTCTTGATCCTCTCCATTATCTCCTCATGGCAACGAGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATCCTGTTACCAACCTGTTTGTTGACCTCAACACCAACAGCGCGCTTGGTAACATTCCAGACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGAACTCAAACCGTGACCCGCCGGCATTTTGAGGTGTTTTTCAGCTGCCTTGTTCACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ++ +38<>><><<96-++42:AABBCCCCCCCCDFFFIYYIIIIIINTTTTNNNNNNTTTTTTNNIIIIHHHHHHYIFFFIDIINIITTTOQDDDHHHNTYTFFFIIINNIITDDDDDDFLLTTTLLLYYYYYYFFIIIILKOOYYYNNNNNNOOKKKKILLLFFIOOTTNLLLLLNYYYYKYFDDDLLLNNNTTNNLMKKSYNNJIIGGGGLLIILOOYYYYYYYYYTNNNNNTYYYYYYYTOLLLLLLNTTYYTTTLLTYYKKKONNNNLLLLGGINIIIIIINNNNNNNNIHHHHHHHHHHINIITTTTNNNNNTYTNNNNIIIFFDHHHFFINNNNIHHHDDEIDDDNNDDKQQQQMMMQQYYNNIDCBBBBAHIGGGKYYYOOD?<AACCCCCHCCC@>>>>HBBAAAA>@999AOOOYIIICC<<,,,99HHHFKK??C>>B>>H?6/+))42856301:7<>HHEI4/))-10449--0..((*4))*35A<9+++44>BB754---@<;42*))45:7024.(')))')++049>>41-'(,'(.2393222/3171((((-.4011/0+).)''),..4133><B=451119411+))<44:686:/066888888=::884))*'''**,''*-.''*,/2(*144+')64>;1/,'')''1*30+0..****(*0-.4-)*),'(''+,-((*+))**+,''''''***''***-*)121,''''(+*,,+-((****.0..,0*))*(),))''))*+,*)()))''''+'')'')**)()'','')'(**((*((*(((*441.-*****())+*''')-++*****-*((((**))))))*)))++***)(**11.()****0*-,((*--.***,((,,,**'')'''')'-((--,''**441***)+'(''*,*( +@gnlti136478624 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCACGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGATAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCTCTGGGTGCGGGAACTCAAACCGGGAACCGGCGGCATTTTGCGGTGTTTTTAGACCCTGCCGGGGGGGCGGTCGAAAGGCCGATTCTTGAGATTTTCCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXGGGGGTAGGAGGTTGTAATTGGAAAAACCTGGGGTAGCAAGTTAATGGCTTGAGCAATTCCGTTCGGCGGGTGGGTATAGAGAAGGGGCGGGCGATCGGGATCCGAAGATGGGGAGCGGATGGGGAGGAGAGGCAGGTGGGGATATAGGGGGGGGGTGGGGTAGGGGAGGGGCGGTGCTGTAGGGGGAGGGGCGGCGTTGGTTTTCTGTGTTACGAGTTGGGTGACCCGAAGTAATTGGGG ++ ++1449>>>;=::AADDCCCCFIICCBB>???BBDDDDYYHBCCBBFF@@777BBG@>7584;;@DHDDDDDDMNIIIIIYYYTOYKKKMIDDDDDHOKKKQSTTTNNIIYYFFFFFIDDDDDIYOOIIDAA>DADDFDLLDDDIKKKKOKKKKKKYYYOOJJJOYYYYTOOKKPMMMMMSSSSMMMSSYYYYLJIIIID=====FKKKKKKYYYOOKKIIIIISSFFDIHIIKSSTOOKKKLYYSSKMIIIOOIIIDDDDHDDDIOOIIFFFIIIIKKKMIIIIIIMKKKKIIIFDDDDADDIDDDDDDDHDDDDFFF99///<<HFFFFFFFFGOOYTDDDHHH99,,,95>>>>47//-</3-822.446777BBBFFIOC>6.++-53:?:>7744213...772007:9:-++33>>DH>>??933;;FQ<93/+10++/.//-10234:1//223;:/,,***++'')'+,/)))-.2.++((.0***,))*,0(())''))))+'')***''))***))),669+,*****..''')*,**,*))))*'',)))'(++,++((*+*)*.*))''')***''.*))'')''''''***+)))++**(''''')****)''')'(***''**+/.)))*)')((''***(('')'')-))''''.'')))**'+''''**))''))***+((***)%(((***(((((,.,,(((((*(((+.(()'''')*(())(***((**-+,,)')''*/,''''**'''))((''*+((''''))*))'')'')),.)())'''''('*)**+***-*(')''))((+++0***(('')'')**()++*+**(')).5+*'''')*,---'''')''' +@gnlti136479063 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCCCTGGTGCGTGAGCTCAAAGCGTGGACCGGCGGCATTTTAGGGTGTTTTTCAGCTGGCTCGGTGGTTTGAATGTGACTTGGGCGGGGGGGGGGTCGAAAGGCGAATTTGGAGATTTTCATAAAATTGGGGCGGTTGAAGATTGATTTTAAGGGGCAATTTGGGCTATAGGGGTGGTTTTAAATTAATGGGGGTGGTTTAAAAGTGTGATGGGGAAACGTGGGTTACCAATTTATGGGTGTGTGGAGTTCCCTTTGTGAGGTGGTATAGGAAAGGGGGGGCGTGACCTGCCACGTGGGGGGGGAAGTGTATGGGGGCGGGTTGGGGGGTTGAGGGGGGTGTGGGTGTGGGGGTGGGTTTTGTTAGGCGAGGTGGTTTTTTTTTCTTTTTTTTTTTAGTGGAGGTGT ++ +04--46:<<B<<>@>HHEB<822<<IEHIHCCCCCCIIIITTIIIIIINNTTTTTYTIHHHHHHNNNIIDDDDFFNTKM>>?OQFFFMKOOTFDDDDHHIIIIOHFFFFFINTDDAAAADHHDDDOYNNHHFFDDDDDDFDC=AA=DIIIIFFNHHFFFFNNNNNNNNNNDD448DNTOOKKKOBB?DFGGGNOTOO555>>A>>>AAF:::>>@DB=====5AACOIIBCCBB<5005<41''+18EAAAHHHB>96-+,+14:AAIB??>>CD>>;87>5:30-14477<>@CDDD>>?==MQYI>H---88:77:<B>>=33000008<9::>BBBFHHCCC>IFDDDOOOQIQQII:2((+6<552228>DDDEH>>33399>31)''-.FFIMIIIO>>333;@II>71:37<AAEIAA778<B69,,,01BBIKFF>>>944,,,6:6/(((*44<<43,,,66AEH98,,,6/+**--..((*,1><::65/0*'))'(,-,)++*31+((*((**.,+*'')'()'''*++))''('*+26410''''+)(())''))*'(***++*))*((****(''''')++**)+'')*))*.-***))*)*-/****,-30.)''''''''***''''')-.*))'')''**++*))/,,((,+-***+)'''''')+'''.*)')'(0-+((+++)))'''(*+'''')'(**,***''''))*))'')))),''))''))*((***))/()(*''''++**((((((****(')))))*))'),))'')''.)))))))'')+,++')-))'(+))***))''))****++))))+1-**))**'(140''))**))'')+**( +@gnlti136479357 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTGTGGTCGCGGCCGAGGTACCCTGCACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGGACTCCAAACGTGAACTTGCCGGGCGGGGGGGAGAGGGGGAGCGGGGGGGGGGGAGAATAAGGGGGGAGGGGAGGGGGAGAGAAAAGGGAGGAGGGGGGGGGTAGGGAGGGAGAGGGAGGGGGGAGGGGGGGGGAGGGGGGGGGGGGAGGGAAGGGGGGGGAGGGGGGGAGGGGGAGGGGAGAGGGGGAGGGGAGGGGGGGGGGAGGGGAGGGGAAGAGGGGGGGGGGGGGGGGGGGAGGGGGGGAGGGGGGGGGGGAGGGGGGGGGGGGGGAGGGAGAAGGAGAAA ++ +.4<BB;>>>>>>>>FDCCCCCCIINIIIDCCCCCCDDDDYQKKFNNNCAAAAAINNIIINTIIHHDDDDDDDDDDDKITTTTLYYYYYLFFIIIILOKKKIIIIKKOKYFDDDDDFIIIIIKKKLLLLTIDDDDDFFDDDDDNNIIIIIKKDDDDHHJOYYSSMMFFADDDDLYSSB>666>BDDDDKOOKJJOOJJED==99AOIJJOOYYYLJJJLLTTTTLYYYYYYYYYYLLIIBBADDNOIIIIIINDAADDDDKOOIIIIIFDDA>7==@@DII??887BBOOFDDDDDIYYNNNHDDKOO?BBHHINODDAF>A>AADFFIIOGFFFFIITOOIDDDDDDDDDDDHHD89,,,<>FFFDD>99<<<B<845;<BAAA;>99=EBIIIIIOOD@@><>AB<8::AA:>AABHIHHHCCC99--+46CCCIIIIAA551-4440++)))4499+))019<>>>1/()0/-('''129.,//+((**+++8@@,*)11))*+***+++))%(,.*)))..,.2+**+8..)))),*))'')))''.''+)*++)+)))''))'(++++*))'''''))****))''))/.03:=741.''**),''''**))))))4**)')'').11.('*))'%)*-.2))*.0('''))(')''))****))('+'')''))****,((((**))1..''))***)1-1-.''),,''))%(.**)(')))*)-().-.***))1)''+''))****))'' |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini_bait_pos.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_mini_bait_pos.fastq Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,8 @@ +@gnlti136478626 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCTCAAAATGCCGGCGGGTCACGGTTTGAGGTCACGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTACCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGCGCTGTTGGTGTTGAGGTCAAGCATGAGGTGGGAAACAGAATCATAAGGAAAAGGATTCATGTGCGTGTGGAAGATGGTCAGGGATCGAGGTGCCATGAGGAGATCATGGTGAGGATCGAGAAGAACGATTAAGGGGAAGGCTGAGGGCGAGGGACAAGGTAAGATTATCAGCGACAAGAGACAAGCGGAGGGAGCCACGGCTGGGTGTGTGGTAGAGGGTGCCTCGGGCCGGGGCAAGGCTAAGCCGAATGCTGGGGATATTCATTAGACTGGGGGGCGGTCGAGGGTGGGTCGTAATGGGCCATTTGCGCGTATGGTGGGGTTGTTTGACATTGCGCTGGCCTCGGTTTACAGGGTTGTGATTGGAAAGCCGTGCGGTTGCCAACGTTAGTGTTTGGGAGACGTTCGCGTTCGGGGGCTGGGGTATTAAGGGGGGTCTGGGGTAAGGGCGTGCGAGGATGGTGGAGGGGTTTGGGGTTGGGCGTCTGTTTCGGGGTTTGTGGCGGGGGGTTGGTGGTTGCGTGACGGTGGGGGGGTTGGGCAGGCTATGGCGGGGCGTGGTGTTGGTCTGGTTGTGAGGATGTGAGTGTGCGTTGTTGTGTATTGGGACAGGT ++ +))..28:>C>CDDDDDDCCCCDDD>>A990028>HFFFIIFDDDDDHOTYYNGFAAAA;>??BQQIDDDIDIIIIGMMDDDDDDNIIIGGFFFFIMYKKIKKDD>D>>>C>D>><<<>::..'')46>IIIQIYYYMFDDAADKKKKYYYYYYNNDDDDIIGGKK777MMFFFKDDFAADDDDFKKKKFFFKIDDDDDDKIIIIEMFF=@@@B@BB??>O???OOTTTTLLKKK???DDDD>AAAA>B994B122:=B44/--447<155>>IIFFIKKKGGGGIIN944499C>>>>>>9</--7/00?;33/5/''''))**,.,,,2/0/20004449,,,-,6,--2:G>D>D74-++.15;911**+/-''))****-,''))1.2-.*****-<>71+**()+19:46.--+-*1611+*((****'''''(-/411-1***.((+***('**-8211,-**'''')+,,4,))''))))'')),,(')))).5++))'')).1-+,,.-+(''(++,,,('''))*''''))')+).''))*)-('')+)*((++.+++-*))('))''))+-0./,,))''))'')''*'')))''****.+*''*))'')'')'')))**+++))'''))'''*''*((****'''')'(,(''''''')*''))''))++*(((*((-))'')-)**()******042))***((*))''))*,-.((*)'')%%%)+++****((***-+*)''''))))''''))''''''')))),))***+('))))+.,)()+**''+.-)))(')''))'(***,(((,,***((((**++'')'(*))'''(**'''******((****//--))0+)''))*****))'')%%'')('*)))-(*01**))'(( +@gnlti136479522 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ++ +(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+ |
b |
diff -r 000000000000 -r 6a88b42ce6b9 test-data/tvc_mini_bait_strict.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_mini_bait_strict.fastq Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,4 @@ +@gnlti136479522 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ++ +(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+ |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/README.rst Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,165 @@ +Galaxy wrapper for the MIRA assembly program (v4.0) +=================================================== + +This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below (MIT licence). + +This tool is a short Python script (to collect the MIRA output and move it +to where Galaxy expects the files) and associated Galaxy wrapper XML file. + +It is available from the Galaxy Tool Shed at: +http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + +It uses a Galaxy datatype definition 'mira' for the MIRA Assembly Format, +http://toolshed.g2.bx.psu.edu/view/peterjc/mira_datatypes + +A separate wrapper for MIRA v3.4 is available from the Galaxy Tool Shed at: +http://toolshed.g2.bx.psu.edu/view/peterjc/mira_assembler + +Automated Installation +====================== + +This should be straightforward. Via the Tool Shed, Galaxy should automatically +install the 'mira' datatype, samtools, and download and install the precompiled +binary for MIRA v4.0.2 for the Galaxy wrapper, and run any tests. + +For MIRA 4, the Galaxy wrapper has been split in two, allowing separate +cluster settings for de novo usage (high RAM) and mapping (lower RAM). +Consult the Galaxy adminstration documentation for your cluster setup. + +WARNING: For larger tasks, be aware that MIRA can require vast amounts +of RAM and run-times of over a week are possible. This tool wrapper makes +no attempt to spot and reject such large jobs. + + +Manual Installation +=================== + +First install the 'mira' datatype for Galaxy, available here: + +* http://toolshed.g2.bx.psu.edu/view/peterjc/mira_datatypes + +There are four Galaxy files to install: + +* ``mira4_de_novo.xml`` (the Galaxy tool definition for de novo usage) +* ``mira4_mapping.xml`` (the Galaxy tool definition for mapping usage) +* ``mira4_convert.xml`` (the Galaxy tool definition for converting MIRA files) +* ``mira4_bait.xml`` (the Galaxy tool definition for mirabait) +* ``mira4.py`` (the Python wrapper script) +* ``mira4_convert.py`` (the Python wrapper script for miraconvert) +* ``mira4_bait.py`` (the Python wrapper script for mirabait) +* ``mira4_validator.py`` (the XML parameter validation script) + +The suggested location is a new ``tools/mira4`` folder. You will also need to +modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool, and also do +this to ``tools_conf.xml.sample`` in order to run the tests:: + + <tool file="mira4/mira4_de_novo.xml" /> + <tool file="mira4/mira4_mapping.xml" /> + +You will also need to install MIRA, we used version 4.0.2, and define the +environment variable ``$MIRA4`` pointing at the folder containing the binaries. +See: + +* http://chevreux.org/projects_mira.html +* http://sourceforge.net/projects/mira-assembler/ + +You may wish to use different cluster setups for the de novo and mapping +tools, see above. + +You will also need to install samtools (for generating a BAM file from MIRA's +SAM output). + +After copying (or symlinking) the ``test-data`` files under Galaxy's ``test-data`` +folder, you can run the tests with:: + + $ ./run_functional_tests.sh -id mira_4_0_bait + $ ./run_functional_tests.sh -id mira_4_0_de_novo + $ ./run_functional_tests.sh -id mira_4_0_mapping + $ ./run_functional_tests.sh -id mira_4_0_convert + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.0.1 - Initial version (prototype for MIRA 4.0 RC4, based on wrapper for v3.4) +v0.0.2 - Include BAM output (using ``miraconvert`` and ``samtools``). + - Updated to target MIRA 4.0.1 + - Simplified XML to apply input format to output data. + - Sets temporary folder at run time to respect environment variables + (``$TMPDIR``, ``$TEMP``, or ``$TMP`` in that order). This was + previously hard coded as ``/tmp``. +v0.0.3 - Updated to target MIRA 4.0.2 +v0.0.4 - Using optparse for the Python wrapper script API + - Made MAF and BAM outputs optional + - Include wrapper for ``miraconvert`` +======= ====================================================================== + + +Developers +========== + +Development is on a dedicated GitHub repository: +https://github.com/peterjc/pico_galaxy/tree/master/tools/mira4 + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use +the following command from the Galaxy root folder:: + + $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4_convert.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_convert.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/header.mira test-data/empty_file.dat + +Check this worked:: + + $ tar -tzf mira4_wrapper.tar.gz + tools/mira4/README.rst + tools/mira4/mira4_de_novo.xml + tools/mira4/mira4_mapping.xml + tools/mira4/mira4_bait.xml + tools/mira4/mira4_convert.xml + tools/mira4/mira4.py + tools/mira4/mira4_make_bam.py + tools/mira4/mira4_validator.py + tools/mira4/mira4_convert.py + tools/mira4/mira4_bait.py + tools/mira4/tool_dependencies.xml + tools/mira4/repository_dependencies.xml + test-data/U13small_m.fastq + test-data/U13small_m.mira4_de_novo.fasta + test-data/tvc_mini.fastq + test-data/tvc_contigs.fasta + test-data/tvc_map_ref_strain.fasta + test-data/tvc_map_same_strain.fasta + test-data/tvc_bait.fasta + test-data/tvc_mini_bait_pos.fastq + test-data/tvc_mini_bait_strict.fastq + test-data/tvc_mini_bait_neg.fastq + test-data/ecoli.fastq + test-data/ecoli.mira4_de_novo.fasta + test-data/header.mira + test-data/empty_file.dat + + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4.py Fri Nov 21 06:42:56 2014 -0500 |
[ |
b'@@ -0,0 +1,313 @@\n+#!/usr/bin/env python\n+"""A simple wrapper script to call MIRA and collect its output.\n+"""\n+import os\n+import sys\n+import subprocess\n+import shutil\n+import time\n+import tempfile\n+from optparse import OptionParser\n+\n+#Do we need any PYTHONPATH magic?\n+from mira4_make_bam import make_bam\n+\n+WRAPPER_VER = "0.0.4" #Keep in sync with the XML file\n+\n+def stop_err(msg, err=1):\n+ sys.stderr.write(msg+"\\n")\n+ sys.exit(err)\n+\n+\n+def get_version(mira_binary):\n+ """Run MIRA to find its version number"""\n+ # At the commend line I would use: mira -v | head -n 1\n+ # however there is some pipe error when doing that here.\n+ cmd = [mira_binary, "-v"]\n+ try:\n+ child = subprocess.Popen(cmd,\n+ stdout=subprocess.PIPE,\n+ stderr=subprocess.STDOUT)\n+ except Exception, err:\n+ sys.stderr.write("Error invoking command:\\n%s\\n\\n%s\\n" % (" ".join(cmd), err))\n+ sys.exit(1)\n+ ver, tmp = child.communicate()\n+ del child\n+ return ver.split("\\n", 1)[0].strip()\n+\n+#Parse Command Line\n+usage = """Galaxy MIRA4 wrapper script v%s - use as follows:\n+\n+$ python mira4.py ...\n+\n+This will run the MIRA binary and collect its output files as directed.\n+""" % WRAPPER_VER\n+parser = OptionParser(usage=usage)\n+parser.add_option("-m", "--manifest", dest="manifest",\n+ default=None, metavar="FILE",\n+ help="MIRA manifest filename")\n+parser.add_option("--maf", dest="maf",\n+ default="-", metavar="FILE",\n+ help="MIRA MAF output filename")\n+parser.add_option("--bam", dest="bam",\n+ default="-", metavar="FILE",\n+ help="Unpadded BAM output filename")\n+parser.add_option("--fasta", dest="fasta",\n+ default="-", metavar="FILE",\n+ help="Unpadded FASTA output filename")\n+parser.add_option("--log", dest="log",\n+ default="-", metavar="FILE",\n+ help="MIRA logging output filename")\n+parser.add_option("-v", "--version", dest="version",\n+ default=False, action="store_true",\n+ help="Show version and quit")\n+options, args = parser.parse_args()\n+manifest = options.manifest\n+out_maf = options.maf\n+out_bam = options.bam\n+out_fasta = options.fasta\n+out_log = options.log\n+\n+try:\n+ mira_path = os.environ["MIRA4"]\n+except KeyError:\n+ stop_err("Environment variable $MIRA4 not set")\n+mira_binary = os.path.join(mira_path, "mira")\n+if not os.path.isfile(mira_binary):\n+ stop_err("Missing mira under $MIRA4, %r\\nFolder contained: %s"\n+ % (mira_binary, ", ".join(os.listdir(mira_path))))\n+mira_convert = os.path.join(mira_path, "miraconvert")\n+if not os.path.isfile(mira_convert):\n+ stop_err("Missing miraconvert under $MIRA4, %r\\nFolder contained: %s"\n+ % (mira_convert, ", ".join(os.listdir(mira_path))))\n+\n+mira_ver = get_version(mira_binary)\n+if not mira_ver.strip().startswith("4.0"):\n+ stop_err("This wrapper is for MIRA V4.0, not:\\n%s\\n%s" % (mira_ver, mira_binary))\n+mira_convert_ver = get_version(mira_convert)\n+if not mira_convert_ver.strip().startswith("4.0"):\n+ stop_err("This wrapper is for MIRA V4.0, not:\\n%s\\n%s" % (mira_ver, mira_convert))\n+if options.version:\n+ print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)\n+ if mira_ver != mira_convert_ver:\n+ print "WARNING: miraconvert %s" % mira_convert_ver\n+ sys.exit(0)\n+\n+if not manifest:\n+ stop_err("Manifest is required")\n+elif not os.path.isfile(manifest):\n+ stop_err("Missing input MIRA manifest file: %r" % manifest)\n+\n+\n+try:\n+ threads = int(os.environ.get("GALAXY_SLOTS", "1"))\n+except ValueError:\n+ threads = 1\n+assert 1 <= threads, threads\n+\n+\n+def override_temp(manifest):\n+ """Override ``-DI:trt=/tmp`` in manifest with environment variable.\n+\n+ Currently MIRA 4 does not allow envronment variables like ``$TMP``\n+ inside the manifest, which'..b't_maf, ref_fasta, out_bam, handle)\n+ else:\n+ #Not collecting the MAF file, use original location \n+ msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle)\n+ if msg:\n+ stop_err(msg)\n+\n+def clean_up(temp, name):\n+ folder = "%s/%s_assembly" % (temp, name)\n+ if os.path.isdir(folder):\n+ shutil.rmtree(folder)\n+\n+#TODO - Run MIRA in /tmp or a configurable directory?\n+#Currently Galaxy puts us somewhere safe like:\n+#/opt/galaxy-dist/database/job_working_directory/846/\n+temp = "."\n+\n+name = "MIRA"\n+\n+override_temp(manifest)\n+\n+start_time = time.time()\n+cmd_list = [mira_binary, "-t", str(threads), manifest]\n+cmd = " ".join(cmd_list)\n+\n+assert os.path.isdir(temp)\n+d = "%s_assembly" % name\n+#This can fail on my development machine if stale folders exist\n+#under Galaxy\'s .../database/job_working_directory/ tree:\n+assert not os.path.isdir(d), "Path %r already exists:\\n%s" % (d, os.path.abspath(d))\n+try:\n+ #Check path access\n+ os.mkdir(d)\n+except Exception, err:\n+ log_manifest(manifest)\n+ sys.stderr.write("Error making directory %s\\n%s" % (d, err))\n+ sys.exit(1)\n+\n+#print os.path.abspath(".")\n+#print cmd\n+\n+if out_log and out_log != "-":\n+ handle = open(out_log, "w")\n+else:\n+ handle = open(os.devnull, "w")\n+handle.write("======================== MIRA manifest (instructions) ========================\\n")\n+m = open(manifest, "rU")\n+for line in m:\n+ handle.write(line)\n+m.close()\n+del m\n+handle.write("\\n")\n+handle.write("============================ Starting MIRA now ===============================\\n")\n+handle.flush()\n+try:\n+ #Run MIRA\n+ child = subprocess.Popen(cmd_list,\n+ stdout=handle,\n+ stderr=subprocess.STDOUT)\n+except Exception, err:\n+ log_manifest(manifest)\n+ sys.stderr.write("Error invoking command:\\n%s\\n\\n%s\\n" % (cmd, err))\n+ #TODO - call clean up?\n+ handle.write("Error invoking command:\\n%s\\n\\n%s\\n" % (cmd, err))\n+ handle.close()\n+ sys.exit(1)\n+#Use .communicate as can get deadlocks with .wait(),\n+stdout, stderr = child.communicate()\n+assert not stdout and not stderr #Should be empty as sent to handle\n+run_time = time.time() - start_time\n+return_code = child.returncode\n+handle.write("\\n")\n+handle.write("============================ MIRA has finished ===============================\\n")\n+handle.write("MIRA took %0.2f hours\\n" % (run_time / 3600.0))\n+if return_code:\n+ print "MIRA took %0.2f hours" % (run_time / 3600.0)\n+ handle.write("Return error code %i from command:\\n" % return_code)\n+ handle.write(cmd + "\\n")\n+ handle.close()\n+ clean_up(temp, name)\n+ log_manifest(manifest)\n+ stop_err("Return error code %i from command:\\n%s" % (return_code, cmd),\n+ return_code)\n+handle.flush()\n+\n+if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):\n+ handle.write("\\n")\n+ handle.write("====================== Extract Large Contigs failed ==========================\\n")\n+ e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU")\n+ for line in e:\n+ handle.write(line)\n+ e.close()\n+ handle.write("============================ (end of ec.log) =================================\\n")\n+ handle.flush()\n+\n+#print "Collecting output..."\n+start_time = time.time()\n+collect_output(temp, name, handle)\n+collect_time = time.time() - start_time\n+handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\\n" % (run_time / 3600.0, collect_time / 60.0))\n+print("MIRA took %0.2f hours; collecting output %0.2f minutes\\n" % (run_time / 3600.0, collect_time / 60.0))\n+\n+if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):\n+ #Treat as an error, but doing this AFTER collect_output\n+ sys.stderr.write("Extract Large Contigs failed\\n")\n+ handle.write("Extract Large Contigs failed\\n")\n+ handle.close()\n+ sys.exit(1)\n+\n+#print "Cleaning up..."\n+clean_up(temp, name)\n+\n+handle.write("\\nDone\\n")\n+handle.close()\n+print("Done")\n' |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_bait.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_bait.py Fri Nov 21 06:42:56 2014 -0500 |
[ |
@@ -0,0 +1,115 @@ +#!/usr/bin/env python +"""A simple wrapper script to call MIRA4's mirabait and collect its output. +""" +import os +import sys +import subprocess +import shutil +import time + +WRAPPER_VER = "0.0.1" #Keep in sync with the XML file + +def stop_err(msg, err=1): + sys.stderr.write(msg+"\n") + sys.exit(err) + + +def get_version(mira_binary): + """Run MIRA to find its version number""" + # At the commend line I would use: mira -v | head -n 1 + # however there is some pipe error when doing that here. + cmd = [mira_binary, "-v"] + try: + child = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except Exception, err: + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + sys.exit(1) + ver, tmp = child.communicate() + del child + #Workaround for -v not working in mirabait 4.0RC4 + if "invalid option" in ver.split("\n", 1)[0]: + for line in ver.split("\n", 1): + if " version " in line: + line = line.split() + return line[line.index("version")+1].rstrip(")") + stop_err("Could not determine MIRA version:\n%s" % ver) + return ver.split("\n", 1)[0] + +try: + mira_path = os.environ["MIRA4"] +except KeyError: + stop_err("Environment variable $MIRA4 not set") +mira_binary = os.path.join(mira_path, "mirabait") +if not os.path.isfile(mira_binary): + stop_err("Missing mirabait under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path)))) +mira_ver = get_version(mira_binary) +if not mira_ver.strip().startswith("4.0"): + stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) +if "-v" in sys.argv or "--version" in sys.argv: + print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) + sys.exit(0) + + +format, output_choice, strand_choice, kmer_length, min_occurance, bait_file, in_file, out_file = sys.argv[1:] + +if format.startswith("fastq"): + format = "fastq" +elif format == "mira": + format = "maf" +elif format != "fasta": + stop_err("Was not expected format %r" % format) + +assert out_file.endswith(".dat") +out_file_stem = out_file[:-4] + +cmd_list = [mira_binary, "-f", format, "-t", format, + "-k", kmer_length, "-n", min_occurance, + bait_file, in_file, out_file_stem] +if output_choice == "pos": + pass +elif output_choice == "neg": + #Invert the selection... + cmd_list.insert(1, "-i") +else: + stop_err("Output choice should be 'pos' or 'neg', not %r" % output_choice) +if strand_choice == "both": + pass +elif strand_choice == "fwd": + #Ingore reverse strand... + cmd_list.insert(1, "-r") +else: + stop_err("Strand choice should be 'both' or 'fwd', not %r" % strand_choice) + +cmd = " ".join(cmd_list) +#print cmd +start_time = time.time() +try: + #Run MIRA + child = subprocess.Popen(cmd_list, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) +except Exception, err: + log_manifest(manifest) + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + sys.exit(1) +#Use .communicate as can get deadlocks with .wait(), +stdout, stderr = child.communicate() +assert stderr is None # Due to way we ran with subprocess +run_time = time.time() - start_time +return_code = child.returncode +print "mirabait took %0.2f minutes" % (run_time / 60.0) + +if return_code: + sys.stderr.write(stdout) + stop_err("Return error code %i from command:\n%s" % (return_code, cmd), + return_code) + +#Capture output +out_tmp = out_file_stem + "." + format +if not os.path.isfile(out_tmp): + sys.stderr.write(stdout) + stop_err("Missing output file from mirabait: %s" % out_tmp) +shutil.move(out_tmp, out_file) |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_bait.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_bait.xml Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,112 @@ +<tool id="mira_4_0_bait" name="MIRA v4.0 mirabait" version="0.0.3"> + <description>Filter reads using kmer matches</description> + <requirements> + <requirement type="binary">mirabait</requirement> + <requirement type="package" version="4.0">MIRA</requirement> + </requirements> + <version_command interpreter="python">mira4_bait.py --version</version_command> + <command interpreter="python"> +mira4_bait.py $input_reads.ext $output_choice $strand_choice $kmer_length $min_occurence "$bait_file" "$input_reads" "$output_reads" + </command> + <stdio> + <!-- Assume anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <inputs> + <param name="bait_file" type="data" format="fasta,fastq,mira" required="true" label="Bait file (what to look for)" /> + <param name="input_reads" type="data" format="fasta,fastq,mira" required="true" label="Reads to search" /> + <param name="output_choice" type="select" label="Output positive matches, or negative matches?"> + <option value="pos">Just positive matches</option> + <option value="neg">Just negative matches</option> + </param> + <param name="strand_choice" type="select" label="Check for matches on both strands?"> + <option value="both">Check both strands</option> + <option value="fwd">Just forward strand</option> + </param> + <param name="kmer_length" type="integer" value="31" min="1" max="32" + label="k-mer length" help="Maximum 32" /> + <param name="min_occurence" type="integer" value="1" min="1" + label="Minimum k-mer occurence" + help="How many k-mer matches do you want per read? Minimum one" /> + </inputs> + <outputs> + <data name="output_reads" format="input" metadata_source="input_reads" + label="$input_reads.name #if str($output_choice)=='pos' then 'matching' else 'excluding matches to' # $bait_file.name"/> + </outputs> + <tests> + <test> + <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" /> + <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" /> + <output name="output_reads" file="tvc_mini_bait_pos.fastq" ftype="fastqsanger" /> + </test> + <test> + <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" /> + <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" /> + <param name="kmer_length" value="32" /> + <param name="min_occurence" value="50" /> + <output name="output_reads" file="tvc_mini_bait_strict.fastq" ftype="fastqsanger" /> + </test> + <test> + <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" /> + <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" /> + <param name="output_choice" value="neg" /> + <output name="output_reads" file="tvc_mini_bait_neg.fastq" ftype="fastqsanger" /> + </test> + </tests> + <help> +**What it does** + +Runs the ``mirabait`` utility from MIRA v4.0 to filter your input reads +according to whether or not they contain perfect kmer matches to your +bait file. By default this looks for 31-mers (kmers or *k*-mers where +the fragment length *k* is 31), and only requires a single matching kmer. + +The ``mirabait`` utility is useful in many applications and pipelines +outside of using the main MIRA tool for assembly or mapping. + +.. class:: warningmark + +Note ``mirabait`` cannot be used on protein (amino acid) sequences. + +**Example Usage** + +To remove over abundant entries like rRNA sequences, run ``mirabait`` with +known rRNA sequences as the bait and select the *negative* matches. + +To do targeted assembly by fishing out reads belonging to a gene and just +assemble these, run ``mirabait`` with the gene of interest as the bait and +select the *positive* matches. + +To iteratively reconstruct mitochondria you could start by fishing out reads +matching any known mitochondrial sequence, assembly those, and repeat. + + +**Notes on paired read** + +.. class:: warningmark + +While MIRA4 is aware of many read naming conventions to identify paired read +partners, the ``mirabait`` tool considers each read in isolation. Applying +it to paired read files may leave you with orphaned reads. + + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999). +Genome Sequence Assembly Using Trace Signals and Additional Sequence Information. +Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56. +http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + </help> +</tool> |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_convert.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_convert.py Fri Nov 21 06:42:56 2014 -0500 |
[ |
@@ -0,0 +1,225 @@ +#!/usr/bin/env python +"""A simple wrapper script to call MIRA and collect its output. + +This focuses on the miraconvert binary. +""" +import os +import sys +import subprocess +import shutil +import time +import tempfile +from optparse import OptionParser +try: + from io import BytesIO +except ImportError: + #Should we worry about Python 2.5 or older? + from StringIO import StringIO as BytesIO + +#Do we need any PYTHONPATH magic? +from mira4_make_bam import depad + +WRAPPER_VER = "0.0.5" #Keep in sync with the XML file + +def stop_err(msg, err=1): + sys.stderr.write(msg+"\n") + sys.exit(err) + +def run(cmd): + #Avoid using shell=True when we call subprocess to ensure if the Python + #script is killed, so too is the child process. + try: + child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception, err: + stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + #Use .communicate as can get deadlocks with .wait(), + stdout, stderr = child.communicate() + return_code = child.returncode + if return_code: + if stderr and stdout: + stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) + else: + stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) + +def get_version(mira_binary): + """Run MIRA to find its version number""" + # At the commend line I would use: mira -v | head -n 1 + # however there is some pipe error when doing that here. + cmd = [mira_binary, "-v"] + try: + child = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except Exception, err: + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + sys.exit(1) + ver, tmp = child.communicate() + del child + return ver.split("\n", 1)[0].strip() + +#Parse Command Line +usage = """Galaxy MIRA4 wrapper script v%s - use as follows: + +$ python mira4_convert.py ... + +This will run the MIRA miraconvert binary and collect its output files as directed. +""" % WRAPPER_VER +parser = OptionParser(usage=usage) +parser.add_option("--input", dest="input", + default=None, metavar="FILE", + help="MIRA input filename") +parser.add_option("-x", "--min_length", dest="min_length", + default="0", + help="Minimum contig length") +parser.add_option("-y", "--min_cover", dest="min_cover", + default="0", + help="Minimum average contig coverage") +parser.add_option("-z", "--min_reads", dest="min_reads", + default="0", + help="Minimum reads per contig") +parser.add_option("--maf", dest="maf", + default="", metavar="FILE", + help="MIRA MAF output filename") +parser.add_option("--ace", dest="ace", + default="", metavar="FILE", + help="ACE output filename") +parser.add_option("--bam", dest="bam", + default="", metavar="FILE", + help="Unpadded BAM output filename") +parser.add_option("--fasta", dest="fasta", + default="", metavar="FILE", + help="Unpadded FASTA output filename") +parser.add_option("--cstats", dest="cstats", + default="", metavar="FILE", + help="Contig statistics filename") +parser.add_option("-v", "--version", dest="version", + default=False, action="store_true", + help="Show version and quit") +options, args = parser.parse_args() +if args: + stop_err("Expected options (e.g. --input example.maf), not arguments") + +input_maf = options.input +out_maf = options.maf +out_bam = options.bam +out_fasta = options.fasta +out_ace = options.ace +out_cstats = options.cstats + +try: + mira_path = os.environ["MIRA4"] +except KeyError: + stop_err("Environment variable $MIRA4 not set") +mira_convert = os.path.join(mira_path, "miraconvert") +if not os.path.isfile(mira_convert): + stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path)))) + +mira_convert_ver = get_version(mira_convert) +if not mira_convert_ver.strip().startswith("4.0"): + stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) +if options.version: + print "%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER) + sys.exit(0) + +if not input_maf: + stop_err("Input MIRA file is required") +elif not os.path.isfile(input_maf): + stop_err("Missing input MIRA file: %r" % input_maf) + +if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): + stop_err("No output requested") + + +def check_min_int(value, name): + try: + i = int(value) + except: + stop_err("Bad %s setting, %r" % (name, value)) + if i < 0: + stop_err("Negative %s setting, %r" % (name, value)) + return i + +min_length = check_min_int(options.min_length, "minimum length") +min_cover = check_min_int(options.min_cover, "minimum cover") +min_reads = check_min_int(options.min_reads, "minimum reads") + +#TODO - Run MIRA in /tmp or a configurable directory? +#Currently Galaxy puts us somewhere safe like: +#/opt/galaxy-dist/database/job_working_directory/846/ +temp = "." + + +cmd_list = [mira_convert] +if min_length: + cmd_list.extend(["-x", str(min_length)]) +if min_cover: + cmd_list.extend(["-y", str(min_cover)]) +if min_reads: + cmd_list.extend(["-z", str(min_reads)]) +cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")]) +if out_maf: + cmd_list.append("maf") +if out_bam: + cmd_list.append("samnbb") + if not out_fasta: + #Need this for samtools depad + out_fasta = os.path.join(temp, "depadded.fasta") +if out_fasta: + cmd_list.append("fasta") +if out_ace: + cmd_list.append("ace") +if out_cstats: + cmd_list.append("cstats") +run(cmd_list) + +def collect(old, new): + if not os.path.isfile(old): + stop_err("Missing expected output file %s" % old) + shutil.move(old, new) + +if out_maf: + collect(os.path.join(temp, "converted.maf"), out_maf) +if out_fasta: + #Can we look at the MAF file to see if there are multiple strains? + old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") + if os.path.isfile(old): + collect(old, out_fasta) + else: + #Might the output be filtered down to zero contigs? + old = os.path.join(temp, "converted.fasta") + if not os.path.isfile(old): + stop_err("Missing expected output FASTA file") + elif os.path.getsize(old) == 0: + print("Warning - no contigs (harsh filters?)") + collect(old, out_fasta) + else: + stop_err("Missing expected output FASTA file (only generic file present)") +if out_ace: + collect(os.path.join(temp, "converted.maf"), out_ace) +if out_cstats: + collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) + +if out_bam: + assert os.path.isfile(out_fasta) + old = os.path.join(temp, "converted.samnbb") + if not os.path.isfile(old): + old = os.path.join(temp, "converted.sam") + if not os.path.isfile(old): + stop_err("Missing expected intermediate file %s" % old) + h = BytesIO() + msg = depad(out_fasta, old, out_bam, h) + if msg: + print(msg) + print(h.getvalue()) + h.close() + sys.exit(1) + h.close() + if out_fasta == os.path.join(temp, "depadded.fasta"): + #Not asked for by Galaxy, no longer needed + os.remove(out_fasta) + +if min_length or min_cover or min_reads: + print("Filtered.") +else: + print("Converted.") |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_convert.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_convert.xml Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,114 @@ +<tool id="mira_4_0_convert" name="MIRA v4.0 miraconvert" version="0.0.5"> + <description>Convert MIRA assembly to FASTA/SAM/BAM</description> + <requirements> + <requirement type="binary">miraconvert</requirement> + <requirement type="package" version="4.0">MIRA</requirement> + </requirements> + <version_command interpreter="python">mira4_convert.py --version</version_command> + <command interpreter="python"> +mira4_convert.py +--input "$mira_file" +--min_length $min_length +--min_cover $min_cover +--min_reads $min_reads +#if str($maf_wanted)=="true": +--maf "$out_maf" +#end if +#if str($fasta_wanted)=="true": +--fasta "$out_fasta" +#end if +#if str($bam_wanted)=="true": +--bam "$out_bam" +#end if +##Don't yet have a Galaxy datatype defined for ace: +## #if str($ace_wanted)=="true": +## --ace "$out_ace" +## #end if +#if str($cstats_wanted)=="true": +--cstats "$out_cstats" +#end if + </command> + <stdio> + <!-- Assume anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <inputs> + <param name="mira_file" type="data" format="mira" required="true" label="MIRA Assembly Format input" /> + <!-- TODO - top level select for contig versus read output? Or two Galaxy tools in different XML files? --> + <param name="min_length" type="integer" required="false" value="0" min="0" + label="Minimum contig length" + help="e.g. Set to 1000 to exclude small contigs. Default is to keep all contigs (minimum zero)" /> + <param name="min_cover" type="integer" required="false" value="0" min="0" + label="Minimum average contig coverage" + help="e.g. Set to 10 to exclude low coverage contigs. Default is to keep all contigs (minimum zero)" /> + <param name="min_reads" type="integer" required="false" value="0" min="0" + label="Minimum reads per contig" + help="e.g. Set to 5 to exclude low coverage contigs with only a few reads. Default is to keep all contigs (minimum zero)." /> + <param name="maf_wanted" type="boolean" label="Output assembly in MIRA's own format? (useful if filtering)" checked="False" /> + <param name="fasta_wanted" type="boolean" label="Convert assembly into (unpadded) FASTA?" checked="True" /> + <param name="bam_wanted" type="boolean" label="Convert assembly into (upadded) BAM format?" checked="False" /> + <!-- Don't yet have a Galaxy datatype defined for ace: + <param name="ace_wanted" type="boolean" label="Convert assembly in ACE format?" checked="False" /> + --> + <param name="cstats_wanted" type="boolean" label="Assembly statistics file?" checked="False" /> + </inputs> + <outputs> + <data name="out_maf" format="mira" label="$mira_file.name (filtered)"> + <filter>maf_wanted is True</filter> + </data> + <data name="out_fasta" format="fasta" label="$mira_file.name (as FASTA)"> + <filter>fasta_wanted is True</filter> + </data> + <data name="out_bam" format="bam" label="$mira_file.name (as BAM)"> + <filter>bam_wanted is True</filter> + </data> + <!-- + <data name="out_ace" format="ace" label="$mira_file.name (as ACE)"> + <filter>ace_wanted is True</filter> + </data> + --> + <data name="out_cstats" format="tabular" label="$mira_file.name (filtered stats)"> + <filter>cstats_wanted is True</filter> + </data> + </outputs> + <tests> + <!-- TODO --> + </tests> + <help> +**What it does** + +Runs the ``miraconvert`` utility from MIRA v4.0 to filter and/or convert +a MIRA Assembly Format file produced by a *mapping* or *de novo* assembly. + +**Example Usage** + +You want to remove all the low coverage contigs from a transcriptome +assembly to focus on those with higher coverage. + +You want to convert your MIRA assembly into SAM/BAM to run a standard +SNP finding tool. + +You've lost the FASTA consensus from your MIRA assembly and need to +regenerate it. + + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999). +Genome Sequence Assembly Using Trace Signals and Additional Sequence Information. +Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56. +http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + </help> +</tool> |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_de_novo.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_de_novo.xml Fri Nov 21 06:42:56 2014 -0500 |
b |
b'@@ -0,0 +1,263 @@\n+<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.4">\n+ <description>Takes Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description>\n+ <requirements>\n+ <requirement type="binary">mira</requirement>\n+ <requirement type="binary">miraconvert</requirement>\n+ <requirement type="package" version="4.0">MIRA</requirement>\n+ <requirement type="binary">samtools</requirement>\n+ <requirement type="package" version="0.1.19">samtools</requirement>\n+ </requirements>\n+ <version_command interpreter="python">mira4.py --version</version_command>\n+ <command interpreter="python">mira4.py\n+--manifest "$manifest"\n+#if str($maf_wanted)=="true":\n+--maf "$out_maf"\n+#end if\n+#if str($bam_wanted)=="true":\n+--bam "$out_bam"\n+#end if\n+--fasta "$out_fasta"\n+--log "$out_log"\n+ </command>\n+ <stdio>\n+ <!-- Assume anything other than zero is an error -->\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ </stdio>\n+ <inputs>\n+ <param name="job_type" type="select" label="Assembly type">\n+ <option value="genome">Genome</option>\n+ <option value="est">EST (transcriptome)</option>\n+ </param>\n+ <param name="job_quality" type="select" label="Assembly quality grade">\n+ <option value="accurate">Accurate</option>\n+ <option value="draft">Draft</option>\n+ </param>\n+ <repeat name="read_group" title="Read Group" min="1">\n+ <param name="technology" type="select" label="Read technology">\n+ <option value="solexa">Solexa/Illumina</option>\n+ <option value="sanger">Sanger cappillary sequencing</option>\n+ <option value="454">Roche 454</option>\n+ <option value="iontor">Ion Torrent</option>\n+ <option value="pcbiolq">PacBio low quality (raw)</option>\n+ <option value="pcbiohq">PacBio high quality (corrected)</option>\n+ <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>\n+ <!-- TODO reference/backbone as an entry here? -->\n+ </param>\n+ <conditional name="segments">\n+ <param name="type" type="select" label="Are these paired reads?">\n+ <option value="paired">Paired reads</option>\n+ <option value="none">Single reads or not relevant (e.g. primer walking with Sanger capillary sequencing)</option>\n+ </param>\n+ <when value="paired">\n+ <param name="placement" type="select" label="Pairing type (segment placing)">\n+ <option value="FR">---> <--- (e.g. Sanger capillary or Solexa/Illumina paired-end library)</option>\n+ <option value="RF"><--- ---> (e.g. Solexa/Illumina mate-pair library)</option>\n+ <option value="SB">2---> 1---> (e.g. Roche 454 paired-end libraries or IonTorrent long-mate; see note)</option>\n+ </param>\n+ <!-- min/max validation is done via the <code> tag -->\n+ <param name="min_size" type="integer" optional="true" min="0" value=""\n+ label="Minimum size of \'good\' DNA templates in the library preparation"\n+ help="Optional, but if used you must also supply a maximum value." /> \n+ <param name="max_size" type="integer" optional="true" min="0" value=""\n+ label="Maximum size of \'good\' DNA templates in the library preparation"\n+ help="Optional, but if used you must also supply a minimum value." />\n+ <param name="naming" type="select" label="Pair naming convention">\n+ <option value="solexa">Solexa/Illumina (using \'/1\' and \'/2\' suffixes, or later Illumina c'..b'ger" />\n+ <param name="maf_wanted" value="true"/>\n+ <param name="bam_wanted" value="true"/>\n+ <output name="out_fasta" file="U13small_m.mira4_de_novo.fasta" ftype="fasta" />\n+ <output name="out_bam" file="empty_file.dat" compare="contains" />\n+ <!-- TODO: Suggest startswith as a compare method? -->\n+ <output name="out_maf" file="header.mira" compare="contains" />\n+ <output name="out_log" file="empty_file.dat" compare="contains" />\n+ </test>\n+ <!-- Simple assembly based on MIRA\'s minidemo/solexa1 example\n+ Note we\'re using just one repeat group,\n+ but two parameters within the repeat (filename, no pairing)\n+ -->\n+ <test>\n+ <param name="job_type" value="genome" />\n+ <param name="job_quality" value="accurate" />\n+ <param name="type" value="none" />\n+ <param name="filenames" value="ecoli.fastq" ftype="fastqsanger" />\n+ <param name="maf_wanted" value="false"/>\n+ <param name="bam_wanted" value="false"/>\n+ <output name="out_fasta" file="ecoli.mira4_de_novo.fasta" ftype="fasta" />\n+ <output name="out_log" file="empty_file.dat" compare="contains" />\n+ </test>\n+ </tests>\n+ <help>\n+\n+**What it does**\n+\n+Runs MIRA v4.0 in de novo mode, collects the output, generates a sorted BAM\n+file, and then throws away all the temporary files.\n+\n+MIRA is an open source assembly tool capable of handling sequence data from\n+a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent\n+and also PacBio).\n+\n+It is particularly suited to small genomes such as bacteria.\n+\n+\n+**Notes on paired reads**\n+\n+.. class:: warningmark\n+\n+MIRA uses read naming conventions to identify paired read partners\n+(and does not care about their order in the input files). In most cases,\n+the Solexa/Illumina setting is fine. For Sanger capillary sequencing,\n+you may need to rename your reads to match one of the standard conventions\n+supported by MIRA. For Roche 454 or Ion Torrent the appropriate settings\n+depend on how the FASTQ file was produced:\n+\n+* If using Roche\'s ``sffinfo`` or older versions of ``sff_extract``\n+ to convert SFF files to FASTQ, your reads will probably have the\n+ ``---> <---`` orientation and use the ``.f`` and ``.r``\n+ suffixes (FR naming).\n+\n+* If using a recent version of ``sff_extract``, then the ``/1`` and ``/2``\n+ suffixes are used (Solexa/Illumina style naming) and the original\n+ ``2---> 1--->`` orientation is preserved.\n+\n+The reason for this is the raw data for Roche 454 and Ion Torrent paired-end\n+libraries sequences a circularised fragment such that the raw data begins\n+with the end of the fragment, a linker, then the start of the fragment.\n+This means both the start and end are sequenced from the same strand, and\n+have the orientation ``2---> 1--->``. However, in order to use the data\n+with traditional tools expecting Sanger capillary style ``---> <---``\n+orientation it was common to reverse complement one of the pair to mimic this.\n+\n+\n+**Citation**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite the following papers:\n+\n+Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n+Galaxy tools and workflows for sequence analysis with applications\n+in molecular plant pathology. PeerJ 1:e167\n+http://dx.doi.org/10.7717/peerj.167\n+\n+Bastien Chevreux, Thomas Wetter and S\xc3\xa1ndor Suhai (1999).\n+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.\n+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.\n+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler\n+ </help>\n+</tool>\n' |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_make_bam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_make_bam.py Fri Nov 21 06:42:56 2014 -0500 |
[ |
@@ -0,0 +1,92 @@ +#!/usr/bin/env python +"""Wrapper script using miraconvert & samtools to get BAM from MIRA. +""" +import os +import sys +import shutil +import subprocess +import tempfile + +def stop_err(msg, err=1): + sys.stderr.write(msg+"\n") + sys.exit(err) + +def run(cmd, log_handle): + try: + child = subprocess.Popen(cmd, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except Exception, err: + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + #TODO - call clean up? + log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + sys.exit(1) + #Use .communicate as can get deadlocks with .wait(), + stdout, stderr = child.communicate() + assert not stderr #Should be empty as sent to stdout + if len(stdout) > 10000: + #miraconvert can be very verbose (is holding stdout in RAM a problem?) + stdout = stdout.split("\n") + stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:] + stdout = "\n".join(stdout) + log_handle.write(stdout) + return child.returncode + +def depad(fasta_file, sam_file, bam_file, log_handle): + log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n") + #Also doing SAM to (uncompressed) BAM during depad + bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder + cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem) + return_code = run(cmd, log_handle) + if return_code: + return "Error %i from command:\n%s" % (return_code, cmd) + if not os.path.isfile(bam_stem + ".bam"): + return "samtools depad or sort failed to produce BAM file" + + log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n") + cmd = 'samtools index "%s.bam"' % bam_stem + return_code = run(cmd, log_handle) + if return_code: + return "Error %i from command:\n%s" % (return_code, cmd) + if not os.path.isfile(bam_stem + ".bam.bai"): + return "samtools indexing of BAM file failed to produce BAI file" + + shutil.move(bam_stem + ".bam", bam_file) + os.remove(bam_stem + ".bam.bai") #Let Galaxy handle that... + + +def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle): + if not os.path.isfile(mira_convert): + return "Missing binary %r" % mira_convert + if not os.path.isfile(maf_file): + return "Missing input MIRA file: %r" % maf_file + if not os.path.isfile(fasta_file): + return "Missing padded FASTA file: %r" % fasta_file + + log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n") + tmp_dir = tempfile.mkdtemp() + sam_file = os.path.join(tmp_dir, "x.sam") + + # Note add nbb to the template name, possible MIRA 4.0 RC4 bug + cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file) + return_code = run(cmd, log_handle) + if return_code: + return "Error %i from command:\n%s" % (return_code, cmd) + if not os.path.isfile(sam_file): + return "Conversion from MIRA to SAM failed" + + #Also doing SAM to (uncompressed) BAM during depad + msg = depad(fasta_file, sam_file, bam_file, log_handle) + if msg: + return msg + + os.remove(sam_file) + os.rmdir(tmp_dir) + + return None #Good :) + +if __name__ == "__main__": + mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:] + msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout) + if msg: + stop_err(msg) |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_mapping.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_mapping.xml Fri Nov 21 06:42:56 2014 -0500 |
b |
b'@@ -0,0 +1,267 @@\n+<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.4">\n+ <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description>\n+ <requirements>\n+ <requirement type="binary">mira</requirement>\n+ <requirement type="binary">miraconvert</requirement>\n+ <requirement type="package" version="4.0">MIRA</requirement>\n+ <requirement type="binary">samtools</requirement>\n+ <requirement type="package" version="0.1.19">samtools</requirement>\n+ </requirements>\n+ <version_command interpreter="python">mira4.py --version</version_command>\n+ <command interpreter="python">mira4.py\n+--manifest "$manifest"\n+#if str($maf_wanted) == "true":\n+--maf "$out_maf"\n+#end if\n+#if str($bam_wanted) == "true":\n+--bam "$out_bam"\n+#end if\n+--fasta "$out_fasta"\n+--log "$out_log"\n+ </command>\n+ <stdio>\n+ <!-- Assume anything other than zero is an error -->\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ </stdio>\n+ <inputs>\n+ <param name="job_type" type="select" label="Assembly type">\n+ <option value="genome">Genome</option>\n+ <option value="est">EST (transcriptome)</option>\n+ </param>\n+ <param name="job_quality" type="select" label="Assembly quality grade">\n+ <option value="accurate">Accurate</option>\n+ <option value="draft">Draft</option>\n+ </param>\n+ <!-- TODO? Allow technology type for references? -->\n+ <!-- TODO? Allow strain settings for reference(s) and reads? -->\n+ <!-- TODO? Use a repeat to allow for multi-strain references? -->\n+ <!-- TODO? Add strain to the mapping read groups? -->\n+ <param name="references" type="data" format="fasta,fastq,mira" multiple="true" required="true" label="Backbone reference file(s)"\n+ help="Multiple files allowed, for example one FASTA file per chromosome or plasmid." />\n+ <param name="strain_setup" type="select" label="Strain configuration (reference vs reads)">\n+ <option value="default">Different strains - mapping reads onto a related reference (\'StrainX\' vs \'ReferenceStrain\')</option>\n+ <option value="same">Same strain - mapping reads from same reference (all \'StrainX\')</option>\n+ </param>\n+ <repeat name="read_group" title="Read Group" min="1">\n+ <param name="technology" type="select" label="Read technology">\n+ <option value="solexa">Solexa/Illumina</option>\n+ <option value="sanger">Sanger cappillary sequencing</option>\n+ <option value="454">Roche 454</option>\n+ <option value="iontor">Ion Torrent</option>\n+ <option value="pcbiolq">PacBio low quality (raw)</option>\n+ <option value="pcbiohq">PacBio high quality (corrected)</option>\n+ <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>\n+ </param>\n+ <conditional name="segments">\n+ <param name="type" type="select" label="Are these paired reads?">\n+ <option value="paired">Paired reads</option>\n+ <option value="none">Single reads or not relevant (e.g. primer walking with Sanger capillary sequencing)</option>\n+ </param>\n+ <when value="paired">\n+ <param name="placement" type="select" label="Pairing type (segment placing)">\n+ <option value="FR">---> <--- (e.g. Sanger capillary or Solexa/Illumina paired-end library)</option>\n+ <option value="RF"><--- ---> (e.g. Solexa/Illumina mate-pair library)</option>\n+ <option value="SB">2---> 1---> (e.g. Roche 454 paired-end libraries or IonTorrent long-mate; see note)</option>\n+ </param>\n+ <param name="naming" type="sele'..b'none" />\n+ <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />\n+ <param name="maf_wanted" value="true"/>\n+ <param name="bam_wanted" value="true"/>\n+ <output name="out_fasta" file="tvc_map_ref_strain.fasta" ftype="fasta" />\n+ <output name="out_bam" file="empty_file.dat" compare="contains" />\n+ <!-- TODO: Suggest startswith as a compare method? -->\n+ <output name="out_maf" file="header.mira" compare="contains" />\n+ <output name="out_log" file="empty_file.dat" compare="contains" />\n+ </test>\n+ <test>\n+ <param name="job_type" value="genome" />\n+ <param name="job_quality" value="accurate" />\n+ <param name="references" value="tvc_contigs.fasta" ftype="fasta" />\n+ <param name="strain_setup" value="same" />\n+ <param name="type" value="none" />\n+ <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />\n+ <param name="maf_wanted" value="false"/>\n+ <param name="bam_wanted" value="false"/>\n+ <output name="out_fasta" file="tvc_map_same_strain.fasta" ftype="fasta" />\n+ <output name="out_log" file="empty_file.dat" compare="contains" />\n+ </test>\n+ </tests>\n+ <help>\n+\n+**What it does**\n+\n+Runs MIRA v4.0 in mapping mode, collects the output, generates a sorted BAM\n+file, and throws away all the temporary files.\n+\n+MIRA is an open source assembly tool capable of handling sequence data from\n+a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent\n+and also PacBio).\n+\n+It is particularly suited to small genomes such as bacteria.\n+\n+\n+**Notes on paired reads**\n+\n+.. class:: warningmark\n+\n+MIRA uses read naming conventions to identify paired read partners\n+(and does not care about their order in the input files). In most cases,\n+the Solexa/Illumina setting is fine. For Sanger capillary sequencing,\n+you may need to rename your reads to match one of the standard conventions\n+supported by MIRA. For Roche 454 or Ion Torrent the appropriate settings\n+depend on how the FASTQ file was produced:\n+\n+* If using Roche\'s ``sffinfo`` or older versions of ``sff_extract``\n+ to convert SFF files to FASTQ, your reads will probably have the\n+ ``---> <---`` orientation and use the ``.f`` and ``.r``\n+ suffixes (FR naming).\n+\n+* If using a recent version of ``sff_extract``, then the ``/1`` and ``/2``\n+ suffixes are used (Solexa/Illumina style naming) and the original\n+ ``2---> 1--->`` orientation is preserved.\n+\n+The reason for this is the raw data for Roche 454 and Ion Torrent paired-end\n+libraries sequences a circularised fragment such that the raw data begins\n+with the end of the fragment, a linker, then the start of the fragment.\n+This means both the start and end are sequenced from the same strand, and\n+have the orientation ``2---> 1--->``. However, in order to use the data\n+with traditional tools expecting Sanger capillary style ``---> <---``\n+orientation it was common to reverse complement one of the pair to mimic this.\n+\n+\n+**Citation**\n+\n+If you use this Galaxy tool in work leading to a scientific publication please\n+cite the following papers:\n+\n+Peter J.A. Cock, Bj\xc3\xb6rn A. Gr\xc3\xbcning, Konrad Paszkiewicz and Leighton Pritchard (2013).\n+Galaxy tools and workflows for sequence analysis with applications\n+in molecular plant pathology. PeerJ 1:e167\n+http://dx.doi.org/10.7717/peerj.167\n+\n+Bastien Chevreux, Thomas Wetter and S\xc3\xa1ndor Suhai (1999).\n+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.\n+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.\n+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html\n+\n+This wrapper is available to install into other Galaxy Instances via the Galaxy\n+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler\n+ </help>\n+</tool>\n' |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/mira4_validator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_validator.py Fri Nov 21 06:42:56 2014 -0500 |
[ |
@@ -0,0 +1,64 @@ +#Called from the Galaxy Tool XML file +#import sys + +def validate_input(trans, error_map, param_values, page_param_map): + """Validates the min_size/max_size user input, before execution.""" + err_list = [] + for read_group in param_values["read_group"]: + err = dict() + segments = read_group["segments"] + if str(segments["type"]) != "paired": + err_list.append(dict()) + continue + + min_size = str(segments["min_size"]).strip() + max_size = str(segments["max_size"]).strip() + #sys.stderr.write("DEBUG min_size=%r, max_size=%r\n" % (min_size, max_size)) + + #Somehow Galaxy seems to turn an empty field into string "None"... + if min_size=="None": + min_size = "" + if max_size=="None": + max_size = "" + + if min_size=="" and max_size=="": + #Both missing is good + pass + elif min_size=="": + err["min_size"] = "Minimum size required if maximum size given" + elif max_size=="": + err["max_size"] = "Maximum size required if minimum size given" + + if min_size: + try: + min_size_int = int(min_size) + if min_size_int < 0: + err["min_size"] = "Minumum size must not be negative (%i)" % min_size_int + min_size = None # Avoid doing comparison below + except ValueError: + err["min_size"] = "Minimum size is not an integer (%s)" % min_size + min_size = None # Avoid doing comparison below + + if max_size: + try: + max_size_int = int(max_size) + if max_size_int< 0: + err["max_size"] = "Maximum size must not be negative (%i)" % max_size_int + max_size = None # Avoid doing comparison below + except ValueError: + err["max_size"] = "Maximum size is not an integer (%s)" % max_size + max_size = None # Avoid doing comparison below + + if min_size and max_size and min_size_int > max_size_int: + msg = "Minimum size must be less than maximum size (%i vs %i)" % (min_size_int, max_size_int) + err["min_size"] = msg + err["max_size"] = msg + + if err: + err_list.append({"segments":err}) + else: + err_list.append(dict()) + + if any(err_list): + #Return an error map only if any readgroup gave errors + error_map["read_group"] = err_list |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/repository_dependencies.xml Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the MIRA datatype definitions (e.g. the MIRA Assembly Format)."> + <repository changeset_revision="ddd2e3362c5e" name="mira_datatypes" owner="peterjc" toolshed="https://toolshed.g2.bx.psu.edu" /> +</repositories> |
b |
diff -r 000000000000 -r 6a88b42ce6b9 tools/mira4/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/tool_dependencies.xml Fri Nov 21 06:42:56 2014 -0500 |
b |
@@ -0,0 +1,55 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.19"> + <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="MIRA" version="4.0"> + <install version="1.0"> + <actions_group> + <!-- Download the binaries for MIRA compatible with 64-bit OSX. --> + <actions architecture="x86_64" os="darwin"> + <action type="download_by_url">http://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0.2_darwin13.1.0_x86_64_static.tar.bz2</action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + </actions> + <!-- Download the binaries for MIRA compatible with 64-bit Linux. --> + <actions architecture="x86_64" os="linux"> + <action type="download_by_url">http://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0.2_linux-gnu_x86_64_static.tar.bz2</action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + </actions> + <!-- This actions tag is only processed if none of the above tags resulted in a successful installation. --> + <actions> + <action type="shell_command">echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported."</action> + <action type="shell_command">echo "Your machine details (the output from 'uname' and 'arch'):"</action> + <action type="shell_command">uname</action> + <action type="shell_command">arch</action> + <action type="shell_command">echo "If pre-compiled MIRA binaries are now available for this, please report this"</action> + <action type="shell_command">echo "via https://github.com/peterjc/pico_galaxyt/issues - thank you!"</action> + <action type="shell_command">false</action> + <!-- The 'false' command will return an error, so Galaxy should treat this as a failed install --> + </actions> + <!-- The $PATH environment variable is only set if one of the above <actions> tags resulted in a successful installation. --> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable> + </action> + <action type="set_environment"> + <environment_variable action="set_to" name="MIRA4">$INSTALL_DIR</environment_variable> + </action> + </actions_group> + </install> + <readme> +Downloads MIRA v4.0.2 from Sourceforge, requesting Bastien's precompiled binaries +for 64 bit (x86_64) Linux or Mac OS X. Other platforms where compilation from +source would be required (e.g. 32 bit Linux) are not supported by this automated +installation script. + +http://chevreux.org/projects_mira.html +http://sourceforge.net/projects/mira-assembler/ + </readme> + </package> +</tool_dependency> |