Previous changeset 4:1c36cf8ef133 (2014-07-30) Next changeset 6:03e134cae41a (2016-05-17) |
Commit message:
v0.2.2; New options for IDs via text parameter, ignore paired read suffix; misc changes |
modified:
tools/seq_filter_by_id/README.rst tools/seq_filter_by_id/seq_filter_by_id.py tools/seq_filter_by_id/seq_filter_by_id.xml tools/seq_filter_by_id/tool_dependencies.xml |
added:
test-data/empty_file.dat test-data/k12_hypothetical_alt.tabular test-data/sanger-pairs-mixed.fastq test-data/sanger-pairs-names.tabular test-data/sanger-sample.fastq |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 test-data/k12_hypothetical_alt.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/k12_hypothetical_alt.tabular Wed May 13 11:03:57 2015 -0400 |
[ |
@@ -0,0 +1,2 @@ +#ID and Description Length +gi|16127999|ref|NP_414546.1| hypothetical protein b0005 [Escherichia coli str. K-12 substr. MG1655] 98 |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 test-data/sanger-pairs-mixed.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger-pairs-mixed.fastq Wed May 13 11:03:57 2015 -0400 |
b |
b"@@ -0,0 +1,800 @@\n+@WTSI_1055_1a03.p1kpIBF bases 1 to 312\n+TTGTTGAACAGCAAAAAGGTCAAGAATATGGATGTTCTCGCCATGATTTTTGTGCCATAGGCGCGCATTCACAAGGTCCATCAGTCGNTCAGCCTGCCGCAACACCACCACCAGCCGCAGCAACAACAACAGCACCAGCAGCAGCTGATCCAATCGCATGTGCCACAGAATAACACCCAAAATCAATTAGCGACGGCCGCCCTCCAGCCGGTTCAGCAGCAGAAACAGCACGAAAAATGGGATCCGATCAAAGAATTTGGGCTGCAAAAGGACGAAATGGCGTTGAAGTCACCGCCCAGCAATGTTTGTGT\n++\n+!96CBHOOTTTYYYQMK???OOTYTTTNNNYYYYNIIIFFIIIIIIIYOOOMAA62.((((*,9@MIIIIO?A3007OOOMMII::%%%::AEHIIIQYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTOOKKKKKYMMYYYKIINNNTYYNIIIINYYYYTOLKKKOOKKKKOLTTYYYYSSSSYYYYSSSSSSMMSOOTLLLONIDDDNOTTYQQMMMMPBB9>BDOOTTQMMMMQMMMQQE:666QQYYPMMDDDADDM@B<FDBBDKKKKKKKKIGKINIFFFKDGGIDB?2/\n+@WTSI_1055_1a04.p1kpIBF bases 1 to 186\n+TTACCCGTCGGCGCCGAAAGAGCCGAAGGCTTTGTGACTGAGGCCGGACACTGTGCTGTTAAGCTGGACATTGCCCGACCTGTCGAGTGCGCCGCTCGCCGAAATTCGTTATCGCGTAAATTTATTTATTTATTTTTATTTTTTTAAATAAAAATGACGACTAATTTGTAAGGGCATAACAACAA\n++\n+!,,,./644,,,-0377<:Q777<BB<<60,+.,+,.4.,))))//15>>550007:66>>==7@71/--0:<CDBB;;49/***/***22,/+)))11===798:3.,,1488?133??BKKMODFB?BDB7447B?:8--.E:F?B77?BKKC<<322B:..<41,46>>B<<::::5116..\n+@WTSI_1055_1a04.q1kpIBR bases 1 to 359\n+TGATTACGCCAAGCTATTTAGGTGAGACTATAGAATACTCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCAGGGTACCCGACGTCCGATATCGCGAAAAATGATGTATCTAGATTTGTCAGGAAACGTCCCCGAGTCTGTTCGACAAACAAACGTTATTCCGAACTCCCAACAACAGTATTTGATTGTGTAAAAATCTCTTGGCCTGATTACTATACTTTAGACATTTTTAGTGCCTGTATTGGAGGTATTTTAGGAACTTTTGGAACGAGCTTTTATCGATTTAGGGAACTAAAAAACCGTTCCATATTCATTAGATGCTATTATTTAAAATCCGAGTCTGATTTGCGAT\n++\n+!41>;D>AA>;;=;;>>AA@@CDDAA>>>ADINIIHHDD>::79:>>FIICCCHHHHCCCCCCCCCHHHHIEA>9..''))**,,++''+)**.,,,-,00..0B+..33010701+++-1B1.,??KMOYYQQQQ<<61,))01<:CAIIIIIYYYYTYTTTTYYYYYTTTTNNKKKKYYYYYYYYYYYYPMMOKTTTTYTTTTTYNINNINTNTIIIIIIIIINNYYYYYYYTTOLKKKIIIINNNOKKKKKFFKKYYYYYYYYYYSSMMMQMYYYYYTTTTLLPIDDDDDDFFFFFFMMKKLNIDFFKQQMMMMMMMMHHFF>A>>:779=5<488>>7745/00::300+++0-\n+@WTSI_1055_1a05.p1kpIBF bases 1 to 642\n+CGTGCCAGTTCTAAACTGGTCGTTCAGCGCCAACCGAAGTGCATACCCTGACGAGCATACACGCAGCTGAAGCGCTCCACAAGCAGCTCTCACCACTAGTCCACGCACCACCCCGCAAGGAGACGGCACGCAGCCACGGGCAAAAGCCGCCTGTTTCACACAACAGCCCGGCTGACCCGACCTTTAGAGCCAATTCTTTTCCCGAAGTTACGAATCTAATTTGCCGACTTCCCTTACCTACATTATTCTATCGACTAGAGGCTGTTCACCTTGGAGACCTGCTGCGGATATCGGTACGATCAGGCAGGAGATTCATATCGCTTCCCTCGCATTTTCAAGGGCCGTGTGGAGCGCACGAGACACCACAGGAACCGCGGTGCTTTACGGGCGCAACATCCCTATCTCAGGCTGAGCCACTTCCAGGCACGCACGCCCTAAACCAGAAAAGAGAACTCTGGCTCGGACTCCACACGACGTCTGCGAGTTCATTTGCGTTACCGCGCGAAACAGTTCTTGCGAACCGTCATTTCCCTGGCCTGGCGTGGGAATGTTAACCCACTTCCCTTTCGGCAACCGGATGGACAAACTGCGCAAGCACAGCAAAGTCTTCATCCGTAGTGTGTGACGGCATTAGCCGGTGC\n++\n+!<>AIHHCCCCCCCCIIIINNNNNTTTYYYYYYYYYYTTTTIIIIHHNIIIFDKFDDINNNTTTNIIIIINTTTTTTTYYYYYYTNNNNNTTYNIIIIIINNYYYYYYYYYYYYYYYYYTNNNNNTTTTTTYYYYYYYYYYYYYYYYYTLLJJJNNTTTTYYYYYYYYYTNNJNJLLTYYYYTONJJJOOYYYYYYYYYYYYYTTTTLOJJJJOOYYYYYYYYYTTTTTTYYYTTTTTTYYYYYYYYYYYYYYYYLJJJJJTYYYTLLLTOTJJJJJKKOYYYYTJNJJJOOTOOIIIILKYYYYTINDDDEEOSYYYYYYYYYYYYYYYYYYYYYYTTLTTTTTTTINIIIOYTKB888>>KMYYIIFIIITKYYYYKKKTOTYYYYYYYYYYYYYYYYYYYKIDDDD>>444>BKLKIIGGDIOYYYYIYYYQIIII@@7507>43--/<<IAAIIII>559==A@IIB>>===KMQM??/33?BIIQQIIFCCFCCFIIICIHA?@F>:>:>>=3...08AIIIMIQQQQCCCCQC:>=:6:>:>>IICA>>>>IFCCC>:>AA>99>;>AACAA>>>::7;7AIII>>>:>>IAI>833688949>@C>:>A;98777=;>99::>4755057132+\n+@WTSI_1055_1a05.q1kpIBR bases 1 to 219\n+CTGTGTACAAAGGGCAGGGACGTATTCAGAGCGAGTTGATGACTCGCCCCTACAAGGAATTCCTCGTTCACGGACAATAATTGCAATGTCCGATCCCAATCACGGCAAATTTTCACCGGTTTACCAACCCCTTTCGGGGAAGGACAAGCACGCTGATTTTGCCAGTGTAGCGCGCGTGCAGCCCCGGACATCTAAGGGCATCACAGACCTGTTATTGC\n++\n+!>>>>>DDIFKOOTTTNDDDHHFTTOOKKKYYTTNNNIYYNNNNNNYTIIIIITIFNIDDKKKNNIIIFIITTTTNNNNNINIINGIKMYYYYYOTTTTTYKKLMMMYYYQOOAAAAIQ;7:<<<A>=AAQA>><<<>7::77::7>>IIIAAAA>:>A=>>5:88::=BIIIIIIIII>>7;9733999=8370---128999::14.,0,,0442+\n+@WTSI_1055_1a07.p1kpIBF bases 1 to 574\n+AACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGATTTCGAGCTCGGTACCCGGGGATCCCACCGGTACGGAGGGAAATTTGATCATCGCGGAAGTGCTCGTTTTGATTATCTTGGTGTATGGCGTCTGTGACCTTCTTTTTCGCTGGATGGGCATCGGGGCGTACGCCTGGGGTTCGCGCTCGAGCCCCAAAATCGCCCTCACTTTCGATGACGGGCCCAGCGAACACACCCGGTCCTTGCTCGAGCTGCTGCACCGCCATGGGGTAAAAG"..b'TGATCAATCGTTGCACTCAGTGCTTTTTGATCGCCATTTTCTCCACGTCAGATTTAACCAGTCAATTTTGTCATTGGCTTCCTTTCAATGCGGTTGCTGCTTCAAAATCATCTCTTCCATTAAATTCGGGTAACGAGCCCAATGTTCTTGATGCTTCAACGAAAACTGATCAGGCGAACTGAAAGGGTGTAAAAAAGATAAAAGAAATTGTAAACGCAGCACATTGTCAAGCAAAGCAACCCAAAAAAATCGATTTTGAGTATAGTCAAAAAGGGTTACCCGTCAATGATGATCTGTTGCTGTTTGTTTGATACTCCTCCTTTCAATTTGCGATTGTTGTTGTTGCAATTGGCACGCGAA\n++\n+!88BHIQQQYYYITTTTIIINNIIIIKKKYYYYIIIIFFYOMTTTYYIIIIAA99//.1<BKKOOTYYYYTTTTNNTTINNNTTYTTNNNIIITTYTTTTTTTTYYYYYIIIIIOYYYYYYYYYYYTTTTTTNNNNTTYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTOTLLYYYYYYYYYTTTTTTTTTTTTTTTTYYYYYYYYYYTTTTTTYYTNNNNNTYYYYYYTTTTTTYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYOKKKOOYYYYKK???KQMMMPPPPQMMKKKMPYYYKKKKKKKKKKMMYYYYYYYYYYYYYYYYYYYYYYYYYYYYYQQQQQI51)%%)4<QQQQQQYYYYTTKTTTTTTTYYYYYYYNNNNNNYYYKKKKGGNNNNYYYYYYYYYYYQMMMMQOKKGIIKKKKYQYYYYYYYYTOOLKKIIIIIOYQQQQQQBA>:;AABAACCCIIIOIIBBIIIII:77<><AAIIIOQQIE=>>>CA>AAABBIIIIIII:00882389667>BAAA?A>77:<844>A?;4++0966.+4492000--4922./..++\n+@WTSI_1055_1g01.p1kpIBF bases 1 to 584\n+CAAATCCTACTGGCCGGACAAAAGAAGCGGCCAAACAACGTGCTCTTCACAAGACGATCACCACCAAAAACATTCACACATGCTCAACGAGACATTGCTTGCAGGATGGCAAGTGCAGGAAGCACTTTCCGGTGCATTAGTTTACACTGACTATGTAACCTATTGTTAATTCCCTGTAGAAACCGTTTGAGTACGACACTGTGTACTCTGAAAATGCCTACCCTCGCTACAAGCGCCGCCCACCTCCGCCTTCACTCCAAGAAGCCCAGCAGAGTCCGGAATTATACGGGCGCGAAATGCAATACAAGGACCAGCGTGGCAAACTAATTCGCAAGGACAACTCTCACGTCGTGGCTTTCAGTCCATTTCTGTCAAGCAAATATGTCGCTCAGTAAAATTAATACTTTTTGTGACAAAATTGCTAACTTTTTTGCAGCATTAACGTCGAGTTTGTCGCGGGAGAAGGATGTATAAAGTACTTATGCAAGTACATGATGAAAGGAGCGGACATGGCCTTTGTCCAAGTCACGGATGCCAACACGGGCCAAAGTGCGCTGAACTACGACGAACTGCAGCAAATTCG\n++\n+!333;>HCDHHIIIYIIINTTYYYYTTTTTTYYYYYYNIIIIIININNTONB81+++04HQYTTTTTTTNIIINNTTNTTTTTTTTYYYTTTTTYTTTTTTYYYYYYYYYTTTTTTYYYYYTIIIIIITTTTTTTTTNNNNNNTNNTTTNNNNNNNNNNNNNNNNTTTTTYYTNNJJJJLYYYYYYYYYTTTTTTYTNNNNNNTYTTTTTTTTYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTYYYYYYYYYYYYTNNNNNNTTYYYTNNNNNTTTNNNNTTYYYYYYYYYYYYYYYYYYYYYYTNNNNNTYYYYYYYYYYYYYYYYYYYYTNNNNNTYYYYYTTTTTTYYYYYYYYYYYYYYYYYYYYYTTTTTTYYYYYYYYYYYTKKKTNNIIINTYYYYYYYYYYYYYYTTTTTTYYYYYYYYYYYYYYYYYTTTTTTOIICBBOQQQQQQC;<88:>>>CIFOYYYYYYQQQQQQQQQCCQQQQHCBAA:AAAAIIA>;A>AAAIC>>AAAACA>>>>III>::>AAACCCIIIA:;==<IIIIIQQAA<:::IA==::8::CQIIIIAA>>CI92\n+@WTSI_1055_1g01.q1kpIBR bases 1 to 350\n+TATGACTGATTACGCCAGCTATTTAGGTGAGACTATAGAATACTCACGCTAGCATGCCTGCAGGTCGACTCTAGAGGATCCCAGGATTGCTTTTTGGCTCGCATACTGCAGCCTGGGGAAGTAGTTGACGTTTTGAAGAATTGAGGGAAGTTGACGTGAAACGGCAACGCGGAGCAGGTCGGAAATCGCTTCGCTATCAGAGCCAAGCAACGAAATGGCGATTGCGCTTAAAAAACATTGGTTTGCTTAAAACATCAATGGTCTTCACCGGTAGAAGCAGTCGCCTAGACCAACGTTGTTGACGCAACGAATGGTGTTTTGCTGCTGGGCAGACGTGGGCGGAGTGCTA\n++\n+!..+---77CBI>7---77>>>DACCCHHHIDDDDCCIHHAA84)))%%%))+,32>>HHHHCCCCCCCCCHIIIIINN<B.,,,+++2.22OBNDHHHHHIIDDDDIIYTNNNNNTTTIIIIIITTTTKKYYYYYYYYYYQOB84-,,.<>FIIIIINNNIIIKKMSSSIIIIIIIIIIIILTOOIIIIIFLLLLLLYYSKKLKKKPMSSYSYSSMSS?KKKKFFFIIFKKKKKKKKSMMMSKKIDDDKKKFDDFFFBBDD=DDMMMKDDDDDDKKFFCCKKKKKFFFKKKKFMMMMMKKKKKKKK734:4B<??B@DC=<871<1314/--,,+++++.-5:97--,\n+@WTSI_1055_1g02.p1kpIBF bases 1 to 523\n+AACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAGCTCGGTACCCGGGGATCCCACGACAAATTCACGGAAGCGTCTCGCACTTTGTGCCGAGGACTGCTGCACAAGGAGCCCACTCTGAGGTTGGGCTGTCGCCGGGTCGGCCGGCCTGAGGACGGCGCGGAAGAGCTGAAGGCACACGCGTTCTTCACACAACCGGACCAGAAGACAGGCAGGGAGCCAATTCCGTGGAGGAAGATGGAGGCCGGCAAGGTGGACGACATTCCCTTCTGAACTGCTAGAGAGGACTTGTAGGAATTCCGTCCTTCAGCTGACACCTCCATTTTGTCCGGACCCCCATTCGGTGTATGCCAAAGATGTGCTGGACATCGAGCAGTTCAGCACTGTCAAGGGAGTTCGTCCGCTTCCACCAAACTTTTCCTACCTGCTGAACCATTAGGTTCGACTTGACGCGACTGACAACTCCTTCTACGACAAGTTCAACAGCGGGTCCGTGTCCATACCTTGGC\n++\n+!08<=AAA:28::87;<::>ACECEIIIIIIIIIIINIKBB>C>QQYNHHHHDDHDHIITIDCCCCOONNNNGDFDDINMINNNNNIHHHHHIINNIIINNNNTYTIIIIDDIIIIYYYTTTTTTYIIIDDDGGITYYSKKKIDNNNNTTNNNNNTYYYTLLLLLLLLLLLYYTYJJJJJNTTTTTTTTTTYYOLLLTTOOOTTTTTTTYNNNNNJJJLLLLLLYYYYYYYYYYSSYYONNNNNNLLTTTTTTTYYYYYYYYYYYYYYYYTMMKKKYYYYYYYYYYYYYTTTTTOOLIILLLLTTLNLLLLLLYYYYYYTTTLLLTTTTTTTYYYYYYTTTTTTTTTTTYYYYYYYYYYYYYYYYYNIIIIITYYTTTLTTNIIFFFMYYYYYYYOOLKKOOTIFIFIINTTTTYYYYYYYYYYYYYYYYYYYYYYTNNNNNNNNTYYYYYYYYYYTTTNNNNNNNNTNIIFFFKYYOOOOOIIIA<:77:<<>>>>IOOIHHHDDEIQMMII<924595/4\n' |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 test-data/sanger-pairs-names.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger-pairs-names.tabular Wed May 13 11:03:57 2015 -0400 |
b |
@@ -0,0 +1,128 @@ +WTSI_1055_1a03 +WTSI_1055_1a04 +WTSI_1055_1a05 +WTSI_1055_1a07 +WTSI_1055_1a08 +WTSI_1055_1a09 +WTSI_1055_1a10 +WTSI_1055_1a11 +WTSI_1055_1a12 +WTSI_1055_1a14 +WTSI_1055_1a15 +WTSI_1055_1a16 +WTSI_1055_1a17 +WTSI_1055_1a18 +WTSI_1055_1a20 +WTSI_1055_1a21 +WTSI_1055_1a22 +WTSI_1055_1a23 +WTSI_1055_1a24 +WTSI_1055_1b01 +WTSI_1055_1b02 +WTSI_1055_1b03 +WTSI_1055_1b04 +WTSI_1055_1b06 +WTSI_1055_1b07 +WTSI_1055_1b08 +WTSI_1055_1b09 +WTSI_1055_1b10 +WTSI_1055_1b11 +WTSI_1055_1b12 +WTSI_1055_1b13 +WTSI_1055_1b14 +WTSI_1055_1b15 +WTSI_1055_1b16 +WTSI_1055_1b17 +WTSI_1055_1b18 +WTSI_1055_1b20 +WTSI_1055_1b21 +WTSI_1055_1b22 +WTSI_1055_1b23 +WTSI_1055_1c01 +WTSI_1055_1c04 +WTSI_1055_1c05 +WTSI_1055_1c06 +WTSI_1055_1c07 +WTSI_1055_1c08 +WTSI_1055_1c09 +WTSI_1055_1c10 +WTSI_1055_1c11 +WTSI_1055_1c12 +WTSI_1055_1c13 +WTSI_1055_1c14 +WTSI_1055_1c15 +WTSI_1055_1c16 +WTSI_1055_1c17 +WTSI_1055_1c19 +WTSI_1055_1c20 +WTSI_1055_1c22 +WTSI_1055_1c24 +WTSI_1055_1d01 +WTSI_1055_1d02 +WTSI_1055_1d03 +WTSI_1055_1d04 +WTSI_1055_1d06 +WTSI_1055_1d07 +WTSI_1055_1d08 +WTSI_1055_1d09 +WTSI_1055_1d10 +WTSI_1055_1d11 +WTSI_1055_1d12 +WTSI_1055_1d13 +WTSI_1055_1d14 +WTSI_1055_1d15 +WTSI_1055_1d16 +WTSI_1055_1d17 +WTSI_1055_1d18 +WTSI_1055_1d19 +WTSI_1055_1d20 +WTSI_1055_1d21 +WTSI_1055_1d22 +WTSI_1055_1d23 +WTSI_1055_1d24 +WTSI_1055_1e01 +WTSI_1055_1e02 +WTSI_1055_1e03 +WTSI_1055_1e04 +WTSI_1055_1e05 +WTSI_1055_1e06 +WTSI_1055_1e07 +WTSI_1055_1e08 +WTSI_1055_1e09 +WTSI_1055_1e10 +WTSI_1055_1e11 +WTSI_1055_1e12 +WTSI_1055_1e13 +WTSI_1055_1e14 +WTSI_1055_1e15 +WTSI_1055_1e16 +WTSI_1055_1e17 +WTSI_1055_1e18 +WTSI_1055_1e21 +WTSI_1055_1e22 +WTSI_1055_1e23 +WTSI_1055_1e24 +WTSI_1055_1f01 +WTSI_1055_1f02 +WTSI_1055_1f03 +WTSI_1055_1f04 +WTSI_1055_1f05 +WTSI_1055_1f06 +WTSI_1055_1f08 +WTSI_1055_1f09 +WTSI_1055_1f10 +WTSI_1055_1f11 +WTSI_1055_1f12 +WTSI_1055_1f14 +WTSI_1055_1f15 +WTSI_1055_1f16 +WTSI_1055_1f17 +WTSI_1055_1f18 +WTSI_1055_1f19 +WTSI_1055_1f20 +WTSI_1055_1f21 +WTSI_1055_1f22 +WTSI_1055_1f23 +WTSI_1055_1f24 +WTSI_1055_1g01 +WTSI_1055_1g02 |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 test-data/sanger-sample.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger-sample.fastq Wed May 13 11:03:57 2015 -0400 |
b |
@@ -0,0 +1,12 @@ +@WTSI_1055_1a05.p1kpIBF bases 1 to 642 +CGTGCCAGTTCTAAACTGGTCGTTCAGCGCCAACCGAAGTGCATACCCTGACGAGCATACACGCAGCTGAAGCGCTCCACAAGCAGCTCTCACCACTAGTCCACGCACCACCCCGCAAGGAGACGGCACGCAGCCACGGGCAAAAGCCGCCTGTTTCACACAACAGCCCGGCTGACCCGACCTTTAGAGCCAATTCTTTTCCCGAAGTTACGAATCTAATTTGCCGACTTCCCTTACCTACATTATTCTATCGACTAGAGGCTGTTCACCTTGGAGACCTGCTGCGGATATCGGTACGATCAGGCAGGAGATTCATATCGCTTCCCTCGCATTTTCAAGGGCCGTGTGGAGCGCACGAGACACCACAGGAACCGCGGTGCTTTACGGGCGCAACATCCCTATCTCAGGCTGAGCCACTTCCAGGCACGCACGCCCTAAACCAGAAAAGAGAACTCTGGCTCGGACTCCACACGACGTCTGCGAGTTCATTTGCGTTACCGCGCGAAACAGTTCTTGCGAACCGTCATTTCCCTGGCCTGGCGTGGGAATGTTAACCCACTTCCCTTTCGGCAACCGGATGGACAAACTGCGCAAGCACAGCAAAGTCTTCATCCGTAGTGTGTGACGGCATTAGCCGGTGC ++ +!<>AIHHCCCCCCCCIIIINNNNNTTTYYYYYYYYYYTTTTIIIIHHNIIIFDKFDDINNNTTTNIIIIINTTTTTTTYYYYYYTNNNNNTTYNIIIIIINNYYYYYYYYYYYYYYYYYTNNNNNTTTTTTYYYYYYYYYYYYYYYYYTLLJJJNNTTTTYYYYYYYYYTNNJNJLLTYYYYTONJJJOOYYYYYYYYYYYYYTTTTLOJJJJOOYYYYYYYYYTTTTTTYYYTTTTTTYYYYYYYYYYYYYYYYLJJJJJTYYYTLLLTOTJJJJJKKOYYYYTJNJJJOOTOOIIIILKYYYYTINDDDEEOSYYYYYYYYYYYYYYYYYYYYYYTTLTTTTTTTINIIIOYTKB888>>KMYYIIFIIITKYYYYKKKTOTYYYYYYYYYYYYYYYYYYYKIDDDD>>444>BKLKIIGGDIOYYYYIYYYQIIII@@7507>43--/<<IAAIIII>559==A@IIB>>===KMQM??/33?BIIQQIIFCCFCCFIIICIHA?@F>:>:>>=3...08AIIIMIQQQQCCCCQC:>=:6:>:>>IICA>>>>IFCCC>:>AA>99>;>AACAA>>>::7;7AIII>>>:>>IAI>833688949>@C>:>A;98777=;>99::>4755057132+ +@WTSI_1055_1a05.q1kpIBR bases 1 to 219 +CTGTGTACAAAGGGCAGGGACGTATTCAGAGCGAGTTGATGACTCGCCCCTACAAGGAATTCCTCGTTCACGGACAATAATTGCAATGTCCGATCCCAATCACGGCAAATTTTCACCGGTTTACCAACCCCTTTCGGGGAAGGACAAGCACGCTGATTTTGCCAGTGTAGCGCGCGTGCAGCCCCGGACATCTAAGGGCATCACAGACCTGTTATTGC ++ +!>>>>>DDIFKOOTTTNDDDHHFTTOOKKKYYTTNNNIYYNNNNNNYTIIIIITIFNIDDKKKNNIIIFIITTTTNNNNNINIINGIKMYYYYYOTTTTTYKKLMMMYYYQOOAAAAIQ;7:<<<A>=AAQA>><<<>7::77::7>>IIIAAAA>:>A=>>5:88::=BIIIIIIIII>>7;9733999=8370---128999::14.,0,,0442+ +@WTSI_1055_1g02.p1kpIBF bases 1 to 523 +AACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAGCTCGGTACCCGGGGATCCCACGACAAATTCACGGAAGCGTCTCGCACTTTGTGCCGAGGACTGCTGCACAAGGAGCCCACTCTGAGGTTGGGCTGTCGCCGGGTCGGCCGGCCTGAGGACGGCGCGGAAGAGCTGAAGGCACACGCGTTCTTCACACAACCGGACCAGAAGACAGGCAGGGAGCCAATTCCGTGGAGGAAGATGGAGGCCGGCAAGGTGGACGACATTCCCTTCTGAACTGCTAGAGAGGACTTGTAGGAATTCCGTCCTTCAGCTGACACCTCCATTTTGTCCGGACCCCCATTCGGTGTATGCCAAAGATGTGCTGGACATCGAGCAGTTCAGCACTGTCAAGGGAGTTCGTCCGCTTCCACCAAACTTTTCCTACCTGCTGAACCATTAGGTTCGACTTGACGCGACTGACAACTCCTTCTACGACAAGTTCAACAGCGGGTCCGTGTCCATACCTTGGC ++ +!08<=AAA:28::87;<::>ACECEIIIIIIIIIIINIKBB>C>QQYNHHHHDDHDHIITIDCCCCOONNNNGDFDDINMINNNNNIHHHHHIINNIIINNNNTYTIIIIDDIIIIYYYTTTTTTYIIIDDDGGITYYSKKKIDNNNNTTNNNNNTYYYTLLLLLLLLLLLYYTYJJJJJNTTTTTTTTTTYYOLLLTTOOOTTTTTTTYNNNNNJJJLLLLLLYYYYYYYYYYSSYYONNNNNNLLTTTTTTTYYYYYYYYYYYYYYYYTMMKKKYYYYYYYYYYYYYTTTTTOOLIILLLLTTLNLLLLLLYYYYYYTTTLLLTTTTTTTYYYYYYTTTTTTTTTTTYYYYYYYYYYYYYYYYYNIIIIITYYTTTLTTNIIFFFMYYYYYYYOOLKKOOTIFIFIINTTTTYYYYYYYYYYYYYYYYYYYYYYTNNNNNNNNTYYYYYYYYYYTTTNNNNNNNNTNIIFFFKYYOOOOOIIIA<:77:<<>>>>IOOIHHHDDEIQMMII<924595/4 |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 tools/seq_filter_by_id/README.rst --- a/tools/seq_filter_by_id/README.rst Wed Jul 30 06:39:53 2014 -0400 +++ b/tools/seq_filter_by_id/README.rst Wed May 13 11:03:57 2015 -0400 |
b |
@@ -1,15 +1,15 @@ Galaxy tool to filter FASTA, FASTQ or SFF sequences by ID ========================================================= -This tool is copyright 2010-2013 by Peter Cock, The James Hutton Institute +This tool is copyright 2010-2015 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below. -This tool is a short Python script (using both the Galaxy and Biopython library -functions) which divides a FASTA, FASTQ, or SFF file in two, those sequences with -or without an ID present in the specified column(s) of a tabular file. Example uses -include filtering based on search results from a tool like NCBI BLAST before -assembly. +This tool is a short Python script (using both Biopython library functions) +which divides a FASTA, FASTQ, or SFF file in two, those sequences with or +without an ID present in the specified column(s) of a tabular file. Example +uses include filtering based on search results from a tool like NCBI BLAST +before assembly. This tool is available from the Galaxy Tool Shed at: @@ -19,6 +19,7 @@ * http://toolshed.g2.bx.psu.edu/view/peterjc/seq_select_by_id * http://toolshed.g2.bx.psu.edu/view/peterjc/seq_rename +* http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_mapping Automated Installation @@ -34,20 +35,20 @@ There are just two files to install to use this tool from within Galaxy: -* seq_filter_by_id.py (the Python script) -* seq_filter_by_id.xml (the Galaxy tool definition) +* ``seq_filter_by_id.py`` (the Python script) +* ``seq_filter_by_id.xml`` (the Galaxy tool definition) -The suggested location is a dedicated tools/seq_filter_by_id folder. +The suggested location is a dedicated ``tools/seq_filter_by_id`` folder. -You will also need to modify the tools_conf.xml file to tell Galaxy to offer the +You will also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the tool. One suggested location is in the filters section. Simply add the line:: <tool file="seq_filter_by_id/seq_filter_by_id.xml" /> -If you wish to run the unit tests, also add this to tools_conf.xml.sample -and move/copy the test-data files under Galaxy's test-data folder. Then:: +If you wish to run the unit tests, also move/copy the ``test-data/`` files +under Galaxy's ``test-data/`` folder. Then:: - $ ./run_functional_tests.sh -id seq_filter_by_id + $ ./run_tests.sh -id seq_filter_by_id You will also need to install Biopython 1.54 or later. That's it. @@ -72,10 +73,22 @@ - Development moved to GitHub, https://github.com/peterjc/pico_galaxy - Renamed folder and adopted README.rst naming. v0.0.7 - Correct automated dependency definition. +v0.0.8 - Simplified XML to apply input format to output data. +v0.2.0 - Can supply ID list as a text parameter (instead of in a file) + - Using ``optparse`` for the Python command line API. + - Advanced option to ignore paired read suffices. + - Updated dependencies to use Biopython 1.64. +v0.2.1 - Use Biopython instead of Galaxy for FASTQ handling. + - Tool definition now embeds citation information. + - Include input dataset name in output dataset names. + - If white space is found in the requested tabular field then only + the first word is used as the identifier (with a warning to stderr). +v0.2.2 - Use the ``format_source=...`` tag. + - Reorder XML elements (internal change only). + - Planemo for Tool Shed upload (``.shed.yml``, internal change only). ======= ====================================================================== - Developers ========== @@ -86,21 +99,35 @@ Development has now moved to a dedicated GitHub repository: https://github.com/peterjc/pico_galaxy/tree/master/tools -For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use -the following command from the Galaxy root folder:: +For pushing a release to the test or main "Galaxy Tool Shed", use the following +Planemo commands (which requires you have set your Tool Shed access details in +``~/.planemo.yml`` and that you have access rights on the Tool Shed):: + + $ planemo shed_upload --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/venn_list/ + ... + +or:: + + $ planemo shed_upload --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/venn_list/ + ... - $ tar -czf seq_filter_by_id.tar.gz tools/seq_filter_by_id/README.rst tools/seq_filter_by_id/seq_filter_by_id.* tools/seq_filter_by_id/tool_dependencies.xml test-data/k12_ten_proteins.fasta test-data/k12_hypothetical.fasta test-data/k12_hypothetical.tabular +To just build and check the tar ball, use:: -Check this worked:: - - $ tar -tzf seq_filter_by_id.tar.gz + $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/venn_list/ + ... + $ tar -tzf shed_upload.tar.gz + test-data/empty_file.dat + test-data/k12_hypothetical.fasta + test-data/k12_hypothetical.tabular + test-data/k12_hypothetical_alt.tabular + test-data/k12_ten_proteins.fasta + test-data/sanger-pairs-mixed.fastq + test-data/sanger-pairs-names.tabular + test-data/sanger-sample.fastq tools/seq_filter_by_id/README.rst tools/seq_filter_by_id/seq_filter_by_id.py tools/seq_filter_by_id/seq_filter_by_id.xml tools/seq_filter_by_id/tool_dependencies.xml - test-data/k12_ten_proteins.fasta - test-data/k12_hypothetical.fasta - test-data/k12_hypothetical.tabular Licence (MIT) |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 tools/seq_filter_by_id/seq_filter_by_id.py --- a/tools/seq_filter_by_id/seq_filter_by_id.py Wed Jul 30 06:39:53 2014 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.py Wed May 13 11:03:57 2015 -0400 |
[ |
b'@@ -3,12 +3,8 @@\n \n Takes six command line options, tabular filename, ID column numbers (comma\n separated list using one based counting), input filename, input type (e.g.\n-FASTA or SFF) and two output filenames (for records with and without the\n-given IDs, same format as input sequence file).\n-\n-If either output filename is just a minus sign, that file is not created.\n-This is intended to allow output for just the matched (or just the non-matched)\n-records.\n+FASTA or SFF) and up to two output filenames (for records with and without\n+the given IDs, same format as input sequence file).\n \n When filtering an SFF file, any Roche XML manifest in the input file is\n preserved in both output files.\n@@ -17,9 +13,9 @@\n in column one, and the ID of the match from the database is in column two.\n Here sensible values for the column numbers would therefore be "1" or "2".\n \n-This tool is a short Python script which requires Biopython 1.54 or later\n-for SFF file support. If you use this tool in scientific work leading to a\n-publication, please cite the Biopython application note:\n+This tool is a short Python script which requires Biopython 1.54 or later.\n+If you use this tool in scientific work leading to a publication, please\n+cite the Biopython application note:\n \n Cock et al 2009. Biopython: freely available Python tools for computational\n molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.\n@@ -29,51 +25,177 @@\n (formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.\n See accompanying text file for licence details (MIT license).\n \n-This is version 0.1.0 of the script, use -v or --version to get the version.\n+Use -v or --version to get the version, -h or --help for help.\n """\n import os\n import sys\n+import re\n+from optparse import OptionParser\n \n-def stop_err(msg, err=1):\n+def sys_exit(msg, err=1):\n sys.stderr.write(msg.rstrip() + "\\n")\n sys.exit(err)\n \n-if "-v" in sys.argv or "--version" in sys.argv:\n- print "v0.1.0"\n+#Parse Command Line\n+usage = """Use as follows:\n+\n+$ python seq_filter_by_id.py [options] tab1 cols1 [, tab2 cols2, ...]\n+\n+e.g. Positive matches using column one from tabular file:\n+\n+$ seq_filter_by_id.py -i my_seqs.fastq -f fastq -p matches.fastq ids.tabular 1\n+\n+Multiple tabular files and column numbers may be given, or replaced with\n+the -t or --text option.\n+"""\n+parser = OptionParser(usage=usage)\n+parser.add_option(\'-i\', \'--input\', dest=\'input\',\n+ default=None, help=\'Input sequences filename\',\n+ metavar="FILE")\n+parser.add_option(\'-f\', \'--format\', dest=\'format\',\n+ default=None,\n+ help=\'Input sequence format (e.g. fasta, fastq, sff)\')\n+parser.add_option(\'-t\', \'--text\', dest=\'id_list\',\n+ default=None, help="Lists of white space separated IDs (instead of a tabular file)")\n+parser.add_option(\'-p\', \'--positive\', dest=\'output_positive\',\n+ default=None,\n+ help=\'Output filename for matches\',\n+ metavar="FILE")\n+parser.add_option(\'-n\', \'--negative\', dest=\'output_negative\',\n+ default=None,\n+ help=\'Output filename for non-matches\',\n+ metavar="FILE")\n+parser.add_option("-l", "--logic", dest="logic",\n+ default="UNION",\n+ help="How to combined multiple ID columns (UNION or INTERSECTION)")\n+parser.add_option("-s", "--suffix", dest="suffix",\n+ action="store_true",\n+ help="Ignore pair-read suffices for matching names")\n+parser.add_option("-v", "--version", dest="version",\n+ default=False, action="store_true",\n+ help="Show version and quit")\n+\n+options, args = parser.parse_args()\n+\n+if options.version:\n+ print "v0.2.1"\n sys.exit(0)\n \n-#Parse Command Line\n-if len(sys.argv) - 1 < 7 or len(sys.argv) % 2 == 1:\n- stop_err("Expected 7 or more arguments, 5 required "\n- "(in seq, seq format'..b' with isn\'t so efficient,\n #but this makes the code simple.\n pos_count = neg_count = 0\n- if out_positive_file != "-":\n+ if out_positive_file is not None:\n out_handle = open(out_positive_file, "wb")\n writer = SffWriter(out_handle, xml=manifest)\n in_handle.seek(0) #start again after getting manifest\n- pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if rec.id in ids)\n+ pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in ids)\n out_handle.close()\n- if out_negative_file != "-":\n+ if out_negative_file is not None:\n out_handle = open(out_negative_file, "wb")\n writer = SffWriter(out_handle, xml=manifest)\n in_handle.seek(0) #start again\n- neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if rec.id not in ids)\n+ neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in ids)\n out_handle.close()\n #And we\'re done\n in_handle.close()\n #At the time of writing, Galaxy doesn\'t show SFF file read counts,\n #so it is useful to put them in stdout and thus shown in job info.\n- print "%i with and %i without specified IDs" % (pos_count, neg_count)\n+ return pos_count, neg_count\n+\n+\n+if seq_format.lower()=="sff":\n+ # Now write filtered SFF file based on IDs wanted\n+ pos_count, neg_count = sff_filter(in_file, out_positive_file, out_negative_file, ids)\n+ # At the time of writing, Galaxy doesn\'t show SFF file read counts,\n+ # so it is useful to put them in stdout and thus shown in job info.\n elif seq_format.lower()=="fasta":\n- #Write filtered FASTA file based on IDs from tabular file\n+ # Write filtered FASTA file based on IDs from tabular file\n pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)\n print "%i with and %i without specified IDs" % (pos_count, neg_count)\n elif seq_format.lower().startswith("fastq"):\n- #Write filtered FASTQ file based on IDs from tabular file\n- from galaxy_utils.sequence.fastq import fastqReader, fastqWriter\n- reader = fastqReader(open(in_file, "rU"))\n- if out_positive_file != "-" and out_negative_file != "-":\n- print "Generating two FASTQ files"\n- positive_writer = fastqWriter(open(out_positive_file, "w"))\n- negative_writer = fastqWriter(open(out_negative_file, "w"))\n- for record in reader:\n- #The [1:] is because the fastaReader leaves the > on the identifier.\n- if record.identifier and record.identifier.split()[0][1:] in ids:\n- positive_writer.write(record)\n- else:\n- negative_writer.write(record)\n- positive_writer.close()\n- negative_writer.close()\n- elif out_positive_file != "-":\n- print "Generating matching FASTQ file"\n- positive_writer = fastqWriter(open(out_positive_file, "w"))\n- for record in reader:\n- #The [1:] is because the fastaReader leaves the > on the identifier.\n- if record.identifier and record.identifier.split()[0][1:] in ids:\n- positive_writer.write(record)\n- positive_writer.close()\n- elif out_negative_file != "-":\n- print "Generating non-matching FASTQ file"\n- negative_writer = fastqWriter(open(out_negative_file, "w"))\n- for record in reader:\n- #The [1:] is because the fastaReader leaves the > on the identifier.\n- if not record.identifier or record.identifier.split()[0][1:] not in ids:\n- negative_writer.write(record)\n- negative_writer.close()\n- reader.close()\n+ # Write filtered FASTQ file based on IDs from tabular file\n+ fastq_filter(in_file, out_positive_file, out_negative_file, ids)\n+ # This does not currently track the counts\n else:\n- stop_err("Unsupported file type %r" % seq_format)\n+ sys_exit("Unsupported file type %r" % seq_format)\n' |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 tools/seq_filter_by_id/seq_filter_by_id.xml --- a/tools/seq_filter_by_id/seq_filter_by_id.xml Wed Jul 30 06:39:53 2014 -0400 +++ b/tools/seq_filter_by_id/seq_filter_by_id.xml Wed May 13 11:03:57 2015 -0400 |
b |
b'@@ -1,34 +1,65 @@\n-<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.6">\n+<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.2">\n <description>from a tabular file</description>\n <requirements>\n- <requirement type="package" version="1.62">biopython</requirement>\n+ <requirement type="package" version="1.64">biopython</requirement>\n <requirement type="python-module">Bio</requirement>\n </requirements>\n- <version_command interpreter="python">seq_filter_by_id.py --version</version_command>\n- <command interpreter="python">\n-seq_filter_by_id.py "$input_file" "$input_file.ext"\n-#if $output_choice_cond.output_choice=="both"\n- $output_pos $output_neg\n-#elif $output_choice_cond.output_choice=="pos"\n- $output_pos -\n-#elif $output_choice_cond.output_choice=="neg"\n- - $output_neg\n-#end if\n-## TODO - Decide on best way to expose multiple ID files via the XML wrapper.\n-## Single tabular file, can call the Python script with either UNION or INTERSECTION\n-UNION "$input_tabular" "$columns"\n- </command>\n <stdio>\n <!-- Anything other than zero is an error -->\n <exit_code range="1:" />\n <exit_code range=":-1" />\n </stdio>\n+ <version_command interpreter="python">seq_filter_by_id.py --version</version_command>\n+ <command interpreter="python">\n+seq_filter_by_id.py -i "$input_file" -f "$input_file.ext"\n+#if $output_choice_cond.output_choice=="both"\n+ -p $output_pos -n $output_neg\n+#elif $output_choice_cond.output_choice=="pos"\n+ -p $output_pos\n+#elif $output_choice_cond.output_choice=="neg"\n+ -n $output_neg\n+#end if\n+#if $adv_opts.adv_opts_selector=="advanced" and $adv_opts.strip_suffix\n+ -s\n+#end if\n+#if $id_opts.id_opts_selector=="tabular":\n+## TODO - Decide on best way to expose multiple ID files via the XML wrapper.\n+## Single tabular file, can call the Python script with either UNION or INTERSECTION\n+-l UNION "$id_opts.input_tabular" "$id_opts.columns"\n+#else\n+-t "$id_opts.id_list"\n+#end if\n+ </command>\n <inputs>\n- <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" help="FASTA, FASTQ, or SFF format." />\n- <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/>\n- <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">\n- <validator type="no_options" message="Pick at least one column"/>\n- </param>\n+ <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." />\n+ <conditional name="id_opts">\n+ <param name="id_opts_selector" type="select" label="Filter using the ID list from">\n+ <option value="tabular" selected="True">tabular file</option>\n+ <option value="list">provided list</option>\n+ <!-- add UNION or INTERSECTION of multiple tabular files here? -->\n+ </param>\n+ <when value="tabular">\n+ <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/>\n+ <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False"\n+ label="Column(s) containing sequence identifiers"\n+ help="Multi-select list - hold the appropriate key while clicking to select multiple columns">\n+ <validator type="no_options" message="Pick at least one column"/>\n+ </param>\n+ </when>\n+ <when value="list">\n+ <param name="id_list" type="text" size="20x80" area="True" format="tabular"\n+ label="List of sequence i'..b'hoice"] != "pos"</filter>\n </data>\n </outputs>\n@@ -75,6 +98,52 @@\n <param name="output_choice" value="pos" />\n <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" />\n </test>\n+ <test>\n+ <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" />\n+ <param name="input_tabular" value="k12_hypothetical_alt.tabular" ftype="tabular" />\n+ <param name="columns" value="1" />\n+ <param name="output_choice" value="pos" />\n+ <param name="adv_opts_selector" value="advanced" />\n+ <param name="strip_suffix" value="true" />\n+ <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" />\n+ </test>\n+ <test>\n+ <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" />\n+ <param name="id_opts_selector" value="list" />\n+ <param name="id_list" value="gi|16127999|ref|NP_414546.1|" />\n+ <param name="output_choice" value="pos" />\n+ <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" />\n+ </test>\n+ <test>\n+ <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" />\n+ <param name="id_opts_selector" value="list" />\n+ <param name="id_list" value="WTSI_1055_1a05 WTSI_1055_1g02" />\n+ <param name="output_choice" value="pos" />\n+ <param name="adv_opts_selector" value="advanced" />\n+ <param name="strip_suffix" value="true" />\n+ <output name="output_pos" file="sanger-sample.fastq" ftype="fastq" />\n+ </test>\n+ <test>\n+ <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" />\n+ <param name="id_opts_selector" value="tabular" />\n+ <param name="input_tabular" value="sanger-pairs-names.tabular" ftype="tabular" />\n+ <param name="columns" value="1" />\n+ <param name="output_choice" value="both" />\n+ <param name="adv_opts_selector" value="advanced" />\n+ <param name="strip_suffix" value="true" />\n+ <output name="output_pos" file="sanger-pairs-mixed.fastq" ftype="fastq" />\n+\t <output name="output_neg" file="empty_file.dat" ftype="fastq" />\n+ </test>\n+ <test>\n+ <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" />\n+ <param name="input_tabular" value="sanger-pairs-names.tabular" ftype="tabular" />\n+ <param name="columns" value="1" />\n+ <param name="output_choice" value="both" />\n+ <param name="adv_opts_selector" value="advanced" />\n+ <param name="strip_suffix" value="false" />\n+ <output name="output_pos" file="empty_file.dat" ftype="fastq" />\n+ <output name="output_neg" file="sanger-pairs-mixed.fastq" ftype="fastq" />\n+ </test>\n </tests>\n <help>\n **What it does**\n@@ -84,6 +153,11 @@\n specified. You can opt to have a single output file of just the matching records,\n or just the non-matching ones.\n \n+Instead of providing the identifiers in a tabular file, you can alternatively\n+provide them as a parameter (type or paste them into the text box). This is a\n+useful shortcut for extracting a few sequences of interest without first having\n+to prepare a tabular file.\n+\n Note that the order of sequences in the original sequence file is preserved, as\n is any Roche XML Manifest in an SFF file. Also, if any sequences share an\n identifier (which would be very unusual in SFF files), duplicates are not removed.\n@@ -122,4 +196,8 @@\n This tool is available to install into other Galaxy Instances via the Galaxy\n Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id\n </help>\n+ <citations>\n+ <citation type="doi">10.7717/peerj.167</citation>\n+ <citation type="doi">10.1093/bioinformatics/btp163</citation>\n+ </citations>\n </tool>\n' |
b |
diff -r 1c36cf8ef133 -r 832c1fd57852 tools/seq_filter_by_id/tool_dependencies.xml --- a/tools/seq_filter_by_id/tool_dependencies.xml Wed Jul 30 06:39:53 2014 -0400 +++ b/tools/seq_filter_by_id/tool_dependencies.xml Wed May 13 11:03:57 2015 -0400 |
b |
@@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="biopython" version="1.62"> - <repository changeset_revision="3e82cbc44886" name="package_biopython_1_62" owner="biopython" toolshed="http://toolshed.g2.bx.psu.edu" /> + <package name="biopython" version="1.64"> + <repository changeset_revision="5477a05cc158" name="package_biopython_1_64" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency> |