Previous changeset 2:d570cc324779 (2011-06-07) |
Commit message:
v0.0.5 - galaxy_sequence_utils dependency and other cleanups inc using MIT license |
added:
test-data/empty_file.dat test-data/sanger-pairs-mixed.fastq test-data/sanger-pairs-names.tabular tools/fastq_filter_by_id/README.rst tools/fastq_filter_by_id/fastq_filter_by_id.py tools/fastq_filter_by_id/fastq_filter_by_id.xml tools/fastq_filter_by_id/tool_dependencies.xml |
removed:
tools/fastq/fastq_filter_by_id.py tools/fastq/fastq_filter_by_id.txt tools/fastq/fastq_filter_by_id.xml tools/fastq/fastq_filter_by_id.xml~ |
b |
diff -r d570cc324779 -r e0041942a12d test-data/sanger-pairs-mixed.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger-pairs-mixed.fastq Fri Feb 03 05:34:18 2017 -0500 |
b |
b"@@ -0,0 +1,800 @@\n+@WTSI_1055_1a03.p1kpIBF bases 1 to 312\n+TTGTTGAACAGCAAAAAGGTCAAGAATATGGATGTTCTCGCCATGATTTTTGTGCCATAGGCGCGCATTCACAAGGTCCATCAGTCGNTCAGCCTGCCGCAACACCACCACCAGCCGCAGCAACAACAACAGCACCAGCAGCAGCTGATCCAATCGCATGTGCCACAGAATAACACCCAAAATCAATTAGCGACGGCCGCCCTCCAGCCGGTTCAGCAGCAGAAACAGCACGAAAAATGGGATCCGATCAAAGAATTTGGGCTGCAAAAGGACGAAATGGCGTTGAAGTCACCGCCCAGCAATGTTTGTGT\n++\n+!96CBHOOTTTYYYQMK???OOTYTTTNNNYYYYNIIIFFIIIIIIIYOOOMAA62.((((*,9@MIIIIO?A3007OOOMMII::%%%::AEHIIIQYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTOOKKKKKYMMYYYKIINNNTYYNIIIINYYYYTOLKKKOOKKKKOLTTYYYYSSSSYYYYSSSSSSMMSOOTLLLONIDDDNOTTYQQMMMMPBB9>BDOOTTQMMMMQMMMQQE:666QQYYPMMDDDADDM@B<FDBBDKKKKKKKKIGKINIFFFKDGGIDB?2/\n+@WTSI_1055_1a04.p1kpIBF bases 1 to 186\n+TTACCCGTCGGCGCCGAAAGAGCCGAAGGCTTTGTGACTGAGGCCGGACACTGTGCTGTTAAGCTGGACATTGCCCGACCTGTCGAGTGCGCCGCTCGCCGAAATTCGTTATCGCGTAAATTTATTTATTTATTTTTATTTTTTTAAATAAAAATGACGACTAATTTGTAAGGGCATAACAACAA\n++\n+!,,,./644,,,-0377<:Q777<BB<<60,+.,+,.4.,))))//15>>550007:66>>==7@71/--0:<CDBB;;49/***/***22,/+)))11===798:3.,,1488?133??BKKMODFB?BDB7447B?:8--.E:F?B77?BKKC<<322B:..<41,46>>B<<::::5116..\n+@WTSI_1055_1a04.q1kpIBR bases 1 to 359\n+TGATTACGCCAAGCTATTTAGGTGAGACTATAGAATACTCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCAGGGTACCCGACGTCCGATATCGCGAAAAATGATGTATCTAGATTTGTCAGGAAACGTCCCCGAGTCTGTTCGACAAACAAACGTTATTCCGAACTCCCAACAACAGTATTTGATTGTGTAAAAATCTCTTGGCCTGATTACTATACTTTAGACATTTTTAGTGCCTGTATTGGAGGTATTTTAGGAACTTTTGGAACGAGCTTTTATCGATTTAGGGAACTAAAAAACCGTTCCATATTCATTAGATGCTATTATTTAAAATCCGAGTCTGATTTGCGAT\n++\n+!41>;D>AA>;;=;;>>AA@@CDDAA>>>ADINIIHHDD>::79:>>FIICCCHHHHCCCCCCCCCHHHHIEA>9..''))**,,++''+)**.,,,-,00..0B+..33010701+++-1B1.,??KMOYYQQQQ<<61,))01<:CAIIIIIYYYYTYTTTTYYYYYTTTTNNKKKKYYYYYYYYYYYYPMMOKTTTTYTTTTTYNINNINTNTIIIIIIIIINNYYYYYYYTTOLKKKIIIINNNOKKKKKFFKKYYYYYYYYYYSSMMMQMYYYYYTTTTLLPIDDDDDDFFFFFFMMKKLNIDFFKQQMMMMMMMMHHFF>A>>:779=5<488>>7745/00::300+++0-\n+@WTSI_1055_1a05.p1kpIBF bases 1 to 642\n+CGTGCCAGTTCTAAACTGGTCGTTCAGCGCCAACCGAAGTGCATACCCTGACGAGCATACACGCAGCTGAAGCGCTCCACAAGCAGCTCTCACCACTAGTCCACGCACCACCCCGCAAGGAGACGGCACGCAGCCACGGGCAAAAGCCGCCTGTTTCACACAACAGCCCGGCTGACCCGACCTTTAGAGCCAATTCTTTTCCCGAAGTTACGAATCTAATTTGCCGACTTCCCTTACCTACATTATTCTATCGACTAGAGGCTGTTCACCTTGGAGACCTGCTGCGGATATCGGTACGATCAGGCAGGAGATTCATATCGCTTCCCTCGCATTTTCAAGGGCCGTGTGGAGCGCACGAGACACCACAGGAACCGCGGTGCTTTACGGGCGCAACATCCCTATCTCAGGCTGAGCCACTTCCAGGCACGCACGCCCTAAACCAGAAAAGAGAACTCTGGCTCGGACTCCACACGACGTCTGCGAGTTCATTTGCGTTACCGCGCGAAACAGTTCTTGCGAACCGTCATTTCCCTGGCCTGGCGTGGGAATGTTAACCCACTTCCCTTTCGGCAACCGGATGGACAAACTGCGCAAGCACAGCAAAGTCTTCATCCGTAGTGTGTGACGGCATTAGCCGGTGC\n++\n+!<>AIHHCCCCCCCCIIIINNNNNTTTYYYYYYYYYYTTTTIIIIHHNIIIFDKFDDINNNTTTNIIIIINTTTTTTTYYYYYYTNNNNNTTYNIIIIIINNYYYYYYYYYYYYYYYYYTNNNNNTTTTTTYYYYYYYYYYYYYYYYYTLLJJJNNTTTTYYYYYYYYYTNNJNJLLTYYYYTONJJJOOYYYYYYYYYYYYYTTTTLOJJJJOOYYYYYYYYYTTTTTTYYYTTTTTTYYYYYYYYYYYYYYYYLJJJJJTYYYTLLLTOTJJJJJKKOYYYYTJNJJJOOTOOIIIILKYYYYTINDDDEEOSYYYYYYYYYYYYYYYYYYYYYYTTLTTTTTTTINIIIOYTKB888>>KMYYIIFIIITKYYYYKKKTOTYYYYYYYYYYYYYYYYYYYKIDDDD>>444>BKLKIIGGDIOYYYYIYYYQIIII@@7507>43--/<<IAAIIII>559==A@IIB>>===KMQM??/33?BIIQQIIFCCFCCFIIICIHA?@F>:>:>>=3...08AIIIMIQQQQCCCCQC:>=:6:>:>>IICA>>>>IFCCC>:>AA>99>;>AACAA>>>::7;7AIII>>>:>>IAI>833688949>@C>:>A;98777=;>99::>4755057132+\n+@WTSI_1055_1a05.q1kpIBR bases 1 to 219\n+CTGTGTACAAAGGGCAGGGACGTATTCAGAGCGAGTTGATGACTCGCCCCTACAAGGAATTCCTCGTTCACGGACAATAATTGCAATGTCCGATCCCAATCACGGCAAATTTTCACCGGTTTACCAACCCCTTTCGGGGAAGGACAAGCACGCTGATTTTGCCAGTGTAGCGCGCGTGCAGCCCCGGACATCTAAGGGCATCACAGACCTGTTATTGC\n++\n+!>>>>>DDIFKOOTTTNDDDHHFTTOOKKKYYTTNNNIYYNNNNNNYTIIIIITIFNIDDKKKNNIIIFIITTTTNNNNNINIINGIKMYYYYYOTTTTTYKKLMMMYYYQOOAAAAIQ;7:<<<A>=AAQA>><<<>7::77::7>>IIIAAAA>:>A=>>5:88::=BIIIIIIIII>>7;9733999=8370---128999::14.,0,,0442+\n+@WTSI_1055_1a07.p1kpIBF bases 1 to 574\n+AACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGATTTCGAGCTCGGTACCCGGGGATCCCACCGGTACGGAGGGAAATTTGATCATCGCGGAAGTGCTCGTTTTGATTATCTTGGTGTATGGCGTCTGTGACCTTCTTTTTCGCTGGATGGGCATCGGGGCGTACGCCTGGGGTTCGCGCTCGAGCCCCAAAATCGCCCTCACTTTCGATGACGGGCCCAGCGAACACACCCGGTCCTTGCTCGAGCTGCTGCACCGCCATGGGGTAAAAG"..b'TGATCAATCGTTGCACTCAGTGCTTTTTGATCGCCATTTTCTCCACGTCAGATTTAACCAGTCAATTTTGTCATTGGCTTCCTTTCAATGCGGTTGCTGCTTCAAAATCATCTCTTCCATTAAATTCGGGTAACGAGCCCAATGTTCTTGATGCTTCAACGAAAACTGATCAGGCGAACTGAAAGGGTGTAAAAAAGATAAAAGAAATTGTAAACGCAGCACATTGTCAAGCAAAGCAACCCAAAAAAATCGATTTTGAGTATAGTCAAAAAGGGTTACCCGTCAATGATGATCTGTTGCTGTTTGTTTGATACTCCTCCTTTCAATTTGCGATTGTTGTTGTTGCAATTGGCACGCGAA\n++\n+!88BHIQQQYYYITTTTIIINNIIIIKKKYYYYIIIIFFYOMTTTYYIIIIAA99//.1<BKKOOTYYYYTTTTNNTTINNNTTYTTNNNIIITTYTTTTTTTTYYYYYIIIIIOYYYYYYYYYYYTTTTTTNNNNTTYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTOTLLYYYYYYYYYTTTTTTTTTTTTTTTTYYYYYYYYYYTTTTTTYYTNNNNNTYYYYYYTTTTTTYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYOKKKOOYYYYKK???KQMMMPPPPQMMKKKMPYYYKKKKKKKKKKMMYYYYYYYYYYYYYYYYYYYYYYYYYYYYYQQQQQI51)%%)4<QQQQQQYYYYTTKTTTTTTTYYYYYYYNNNNNNYYYKKKKGGNNNNYYYYYYYYYYYQMMMMQOKKGIIKKKKYQYYYYYYYYTOOLKKIIIIIOYQQQQQQBA>:;AABAACCCIIIOIIBBIIIII:77<><AAIIIOQQIE=>>>CA>AAABBIIIIIII:00882389667>BAAA?A>77:<844>A?;4++0966.+4492000--4922./..++\n+@WTSI_1055_1g01.p1kpIBF bases 1 to 584\n+CAAATCCTACTGGCCGGACAAAAGAAGCGGCCAAACAACGTGCTCTTCACAAGACGATCACCACCAAAAACATTCACACATGCTCAACGAGACATTGCTTGCAGGATGGCAAGTGCAGGAAGCACTTTCCGGTGCATTAGTTTACACTGACTATGTAACCTATTGTTAATTCCCTGTAGAAACCGTTTGAGTACGACACTGTGTACTCTGAAAATGCCTACCCTCGCTACAAGCGCCGCCCACCTCCGCCTTCACTCCAAGAAGCCCAGCAGAGTCCGGAATTATACGGGCGCGAAATGCAATACAAGGACCAGCGTGGCAAACTAATTCGCAAGGACAACTCTCACGTCGTGGCTTTCAGTCCATTTCTGTCAAGCAAATATGTCGCTCAGTAAAATTAATACTTTTTGTGACAAAATTGCTAACTTTTTTGCAGCATTAACGTCGAGTTTGTCGCGGGAGAAGGATGTATAAAGTACTTATGCAAGTACATGATGAAAGGAGCGGACATGGCCTTTGTCCAAGTCACGGATGCCAACACGGGCCAAAGTGCGCTGAACTACGACGAACTGCAGCAAATTCG\n++\n+!333;>HCDHHIIIYIIINTTYYYYTTTTTTYYYYYYNIIIIIININNTONB81+++04HQYTTTTTTTNIIINNTTNTTTTTTTTYYYTTTTTYTTTTTTYYYYYYYYYTTTTTTYYYYYTIIIIIITTTTTTTTTNNNNNNTNNTTTNNNNNNNNNNNNNNNNTTTTTYYTNNJJJJLYYYYYYYYYTTTTTTYTNNNNNNTYTTTTTTTTYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTYYYYYYYYYYYYTNNNNNNTTYYYTNNNNNTTTNNNNTTYYYYYYYYYYYYYYYYYYYYYYTNNNNNTYYYYYYYYYYYYYYYYYYYYTNNNNNTYYYYYTTTTTTYYYYYYYYYYYYYYYYYYYYYTTTTTTYYYYYYYYYYYTKKKTNNIIINTYYYYYYYYYYYYYYTTTTTTYYYYYYYYYYYYYYYYYTTTTTTOIICBBOQQQQQQC;<88:>>>CIFOYYYYYYQQQQQQQQQCCQQQQHCBAA:AAAAIIA>;A>AAAIC>>AAAACA>>>>III>::>AAACCCIIIA:;==<IIIIIQQAA<:::IA==::8::CQIIIIAA>>CI92\n+@WTSI_1055_1g01.q1kpIBR bases 1 to 350\n+TATGACTGATTACGCCAGCTATTTAGGTGAGACTATAGAATACTCACGCTAGCATGCCTGCAGGTCGACTCTAGAGGATCCCAGGATTGCTTTTTGGCTCGCATACTGCAGCCTGGGGAAGTAGTTGACGTTTTGAAGAATTGAGGGAAGTTGACGTGAAACGGCAACGCGGAGCAGGTCGGAAATCGCTTCGCTATCAGAGCCAAGCAACGAAATGGCGATTGCGCTTAAAAAACATTGGTTTGCTTAAAACATCAATGGTCTTCACCGGTAGAAGCAGTCGCCTAGACCAACGTTGTTGACGCAACGAATGGTGTTTTGCTGCTGGGCAGACGTGGGCGGAGTGCTA\n++\n+!..+---77CBI>7---77>>>DACCCHHHIDDDDCCIHHAA84)))%%%))+,32>>HHHHCCCCCCCCCHIIIIINN<B.,,,+++2.22OBNDHHHHHIIDDDDIIYTNNNNNTTTIIIIIITTTTKKYYYYYYYYYYQOB84-,,.<>FIIIIINNNIIIKKMSSSIIIIIIIIIIIILTOOIIIIIFLLLLLLYYSKKLKKKPMSSYSYSSMSS?KKKKFFFIIFKKKKKKKKSMMMSKKIDDDKKKFDDFFFBBDD=DDMMMKDDDDDDKKFFCCKKKKKFFFKKKKFMMMMMKKKKKKKK734:4B<??B@DC=<871<1314/--,,+++++.-5:97--,\n+@WTSI_1055_1g02.p1kpIBF bases 1 to 523\n+AACGACGGCCAGTGAATTGTAATACGACTCACTATAGGGCGAATTCGAGCTCGGTACCCGGGGATCCCACGACAAATTCACGGAAGCGTCTCGCACTTTGTGCCGAGGACTGCTGCACAAGGAGCCCACTCTGAGGTTGGGCTGTCGCCGGGTCGGCCGGCCTGAGGACGGCGCGGAAGAGCTGAAGGCACACGCGTTCTTCACACAACCGGACCAGAAGACAGGCAGGGAGCCAATTCCGTGGAGGAAGATGGAGGCCGGCAAGGTGGACGACATTCCCTTCTGAACTGCTAGAGAGGACTTGTAGGAATTCCGTCCTTCAGCTGACACCTCCATTTTGTCCGGACCCCCATTCGGTGTATGCCAAAGATGTGCTGGACATCGAGCAGTTCAGCACTGTCAAGGGAGTTCGTCCGCTTCCACCAAACTTTTCCTACCTGCTGAACCATTAGGTTCGACTTGACGCGACTGACAACTCCTTCTACGACAAGTTCAACAGCGGGTCCGTGTCCATACCTTGGC\n++\n+!08<=AAA:28::87;<::>ACECEIIIIIIIIIIINIKBB>C>QQYNHHHHDDHDHIITIDCCCCOONNNNGDFDDINMINNNNNIHHHHHIINNIIINNNNTYTIIIIDDIIIIYYYTTTTTTYIIIDDDGGITYYSKKKIDNNNNTTNNNNNTYYYTLLLLLLLLLLLYYTYJJJJJNTTTTTTTTTTYYOLLLTTOOOTTTTTTTYNNNNNJJJLLLLLLYYYYYYYYYYSSYYONNNNNNLLTTTTTTTYYYYYYYYYYYYYYYYTMMKKKYYYYYYYYYYYYYTTTTTOOLIILLLLTTLNLLLLLLYYYYYYTTTLLLTTTTTTTYYYYYYTTTTTTTTTTTYYYYYYYYYYYYYYYYYNIIIIITYYTTTLTTNIIFFFMYYYYYYYOOLKKOOTIFIFIINTTTTYYYYYYYYYYYYYYYYYYYYYYTNNNNNNNNTYYYYYYYYYYTTTNNNNNNNNTNIIFFFKYYOOOOOIIIA<:77:<<>>>>IOOIHHHDDEIQMMII<924595/4\n' |
b |
diff -r d570cc324779 -r e0041942a12d test-data/sanger-pairs-names.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sanger-pairs-names.tabular Fri Feb 03 05:34:18 2017 -0500 |
b |
@@ -0,0 +1,128 @@ +WTSI_1055_1a03 +WTSI_1055_1a04 +WTSI_1055_1a05 +WTSI_1055_1a07 +WTSI_1055_1a08 +WTSI_1055_1a09 +WTSI_1055_1a10 +WTSI_1055_1a11 +WTSI_1055_1a12 +WTSI_1055_1a14 +WTSI_1055_1a15 +WTSI_1055_1a16 +WTSI_1055_1a17 +WTSI_1055_1a18 +WTSI_1055_1a20 +WTSI_1055_1a21 +WTSI_1055_1a22 +WTSI_1055_1a23 +WTSI_1055_1a24 +WTSI_1055_1b01 +WTSI_1055_1b02 +WTSI_1055_1b03 +WTSI_1055_1b04 +WTSI_1055_1b06 +WTSI_1055_1b07 +WTSI_1055_1b08 +WTSI_1055_1b09 +WTSI_1055_1b10 +WTSI_1055_1b11 +WTSI_1055_1b12 +WTSI_1055_1b13 +WTSI_1055_1b14 +WTSI_1055_1b15 +WTSI_1055_1b16 +WTSI_1055_1b17 +WTSI_1055_1b18 +WTSI_1055_1b20 +WTSI_1055_1b21 +WTSI_1055_1b22 +WTSI_1055_1b23 +WTSI_1055_1c01 +WTSI_1055_1c04 +WTSI_1055_1c05 +WTSI_1055_1c06 +WTSI_1055_1c07 +WTSI_1055_1c08 +WTSI_1055_1c09 +WTSI_1055_1c10 +WTSI_1055_1c11 +WTSI_1055_1c12 +WTSI_1055_1c13 +WTSI_1055_1c14 +WTSI_1055_1c15 +WTSI_1055_1c16 +WTSI_1055_1c17 +WTSI_1055_1c19 +WTSI_1055_1c20 +WTSI_1055_1c22 +WTSI_1055_1c24 +WTSI_1055_1d01 +WTSI_1055_1d02 +WTSI_1055_1d03 +WTSI_1055_1d04 +WTSI_1055_1d06 +WTSI_1055_1d07 +WTSI_1055_1d08 +WTSI_1055_1d09 +WTSI_1055_1d10 +WTSI_1055_1d11 +WTSI_1055_1d12 +WTSI_1055_1d13 +WTSI_1055_1d14 +WTSI_1055_1d15 +WTSI_1055_1d16 +WTSI_1055_1d17 +WTSI_1055_1d18 +WTSI_1055_1d19 +WTSI_1055_1d20 +WTSI_1055_1d21 +WTSI_1055_1d22 +WTSI_1055_1d23 +WTSI_1055_1d24 +WTSI_1055_1e01 +WTSI_1055_1e02 +WTSI_1055_1e03 +WTSI_1055_1e04 +WTSI_1055_1e05 +WTSI_1055_1e06 +WTSI_1055_1e07 +WTSI_1055_1e08 +WTSI_1055_1e09 +WTSI_1055_1e10 +WTSI_1055_1e11 +WTSI_1055_1e12 +WTSI_1055_1e13 +WTSI_1055_1e14 +WTSI_1055_1e15 +WTSI_1055_1e16 +WTSI_1055_1e17 +WTSI_1055_1e18 +WTSI_1055_1e21 +WTSI_1055_1e22 +WTSI_1055_1e23 +WTSI_1055_1e24 +WTSI_1055_1f01 +WTSI_1055_1f02 +WTSI_1055_1f03 +WTSI_1055_1f04 +WTSI_1055_1f05 +WTSI_1055_1f06 +WTSI_1055_1f08 +WTSI_1055_1f09 +WTSI_1055_1f10 +WTSI_1055_1f11 +WTSI_1055_1f12 +WTSI_1055_1f14 +WTSI_1055_1f15 +WTSI_1055_1f16 +WTSI_1055_1f17 +WTSI_1055_1f18 +WTSI_1055_1f19 +WTSI_1055_1f20 +WTSI_1055_1f21 +WTSI_1055_1f22 +WTSI_1055_1f23 +WTSI_1055_1f24 +WTSI_1055_1g01 +WTSI_1055_1g02 |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq/fastq_filter_by_id.py --- a/tools/fastq/fastq_filter_by_id.py Tue Jun 07 17:24:08 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,95 +0,0 @@ -#!/usr/bin/env python -"""Filter a FASTQ file with IDs from a tabular file, e.g. from BLAST. - -NOTE - This script is now OBSOLETE, having been replaced by a new verion -which handles FASTA, FASTQ and SFF all in one. - -Takes five command line options, tabular filename, ID column numbers -(comma separated list using one based counting), input FASTA filename, and -two output FASTA filenames (for records with and without the given IDs). - -If either output filename is just a minus sign, that file is not created. -This is intended to allow output for just the matched (or just the non-matched) -records. - -Note in the default NCBI BLAST+ tabular output, the query sequence ID is -in column one, and the ID of the match from the database is in column two. -Here sensible values for the column numbers would therefore be "1" or "2". - -This script is copyright 2010-2011 by Peter Cock, SCRI, UK. All rights reserved. -See accompanying text file for licence details (MIT/BSD style). - -This is version 0.0.4 of the script. -""" -import sys -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -#Parse Command Line -try: - tabular_file, cols_arg, in_file, out_positive_file, out_negative_file = sys.argv[1:] -except ValueError: - stop_err("Expected five arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv))) -try: - columns = [int(arg)-1 for arg in cols_arg.split(",")] -except ValueError: - stop_err("Expected list of columns (comma separated integers), got %s" % cols_arg) - -#Read tabular file and record all specified identifiers -ids = set() -handle = open(tabular_file, "rU") -if len(columns)>1: - #General case of many columns - for line in handle: - if line.startswith("#"): - #Ignore comments - continue - parts = line.rstrip("\n").split("\t") - for col in columns: - ids.add(parts[col]) - print "Using %i IDs from %i columns of tabular file" % (len(ids), len(columns)) -else: - #Single column, special case speed up - col = columns[0] - for line in handle: - if not line.startswith("#"): - ids.add(line.rstrip("\n").split("\t")[col]) - print "Using %i IDs from tabular file" % (len(ids)) -handle.close() - -#Write filtered FASTQ file based on IDs from tabular file -reader = fastqReader(open(in_file, "rU")) -if out_positive_file != "-" and out_negative_file != "-": - print "Generating two FASTQ files" - positive_writer = fastqWriter(open(out_positive_file, "w")) - negative_writer = fastqWriter(open(out_negative_file, "w")) - for record in reader: - #The [1:] is because the fastaReader leaves the @ on the identifer. - if record.identifier and record.identifier.split()[0][1:] in ids: - positive_writer.write(record) - else: - negative_writer.write(record) - positive_writer.close() - negative_writer.close() -elif out_positive_file != "-": - print "Generating matching FASTQ file" - positive_writer = fastqWriter(open(out_positive_file, "w")) - for record in reader: - #The [1:] is because the fastaReader leaves the @ on the identifer. - if record.identifier and record.identifier.split()[0][1:] in ids: - positive_writer.write(record) - positive_writer.close() -elif out_negative_file != "-": - print "Generating non-matching FASTQ file" - negative_writer = fastqWriter(open(out_negative_file, "w")) - for record in reader: - #The [1:] is because the fastaReader leaves the @ on the identifer. - if not record.identifier or record.identifier.split()[0][1:] not in ids: - negative_writer.write(record) - negative_writer.close() -else: - stop_err("Neither output file requested") -reader.close() |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq/fastq_filter_by_id.txt --- a/tools/fastq/fastq_filter_by_id.txt Tue Jun 07 17:24:08 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,89 +0,0 @@ -Obsolete -======== - -This tool is now obsolete, having been replaced by a more general version -covering the FASTA, FASTQ and SFF sequence formats in a single tool. You -should only install this tool if you need to support existing workflows -which used it. - -Galaxy tool to filter FASTQ sequences by ID -=========================================== - -This tool is copyright 2010 by Peter Cock, SCRI, UK. All rights reserved. -See the licence text below. - -This tool is a short Python script (using the Galaxy library functions) which -divides a FASTQ file in two, those sequences with or without an ID present in -the specified column(s) of a tabular file. Example uses include filtering based -on search results from a tool like NCBI BLAST before assembly. - -There are just two files to install: - -* fastq_filter_by_id.py (the Python script) -* fastq_filter_by_id.xml (the Galaxy tool definition) - -The suggested location is next to the similarly named fastq_filter.py and -fastq_filter.xml files which are included with Galaxy, i.e. in the Galaxy -folder tools/fastq - -You will also need to modify the tools_conf.xml file to tell Galaxy to offer -the tool. The suggested location is next to the fastq_filter.xml entry. Simply -add the line: - -<tool file="fastq/fastq_filter_by_id.xml" /> - -That's it. - - -History -======= - -v0.0.1 - Initial verion (not publicly released) -v0.0.2 - Allow both, just pos or just neg output files - - Preserve the FASTQ variant in the XML wrapper -v0.0.3 - Fixed bug when generating non-matching FASTQ file only -v0.0.4 - Deprecated, marked as hidden in the XML - - -Developers -========== - -This script and related tools are being developed on the following hg branch: -http://bitbucket.org/peterjc/galaxy-central/src/tools - -This incorporates the previously used hg branch: -http://bitbucket.org/peterjc/galaxy-central/src/fasta_filter - -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use -the following command from the Galaxy root folder: - -tar -czf fastq_filter_by_id.tar.gz tools/fastq/fastq_filter_by_id.* - -Check this worked: - -$ tar -tzf fastq_filter_by_id.tar.gz -fastq/fastq_filter_by_id.py -fastq/fastq_filter_by_id.txt -fastq/fastq_filter_by_id.xml - - -Licence (MIT/BSD style) -======================= - -Permission to use, copy, modify, and distribute this software and its -documentation with or without modifications and for any purpose and -without fee is hereby granted, provided that any copyright notices -appear in all copies and that both those copyright notices and this -permission notice appear in supporting documentation, and that the -names of the contributors or copyright holders not be used in -advertising or publicity pertaining to distribution of the software -without specific prior permission. - -THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT -OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE. |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq/fastq_filter_by_id.xml --- a/tools/fastq/fastq_filter_by_id.xml Tue Jun 07 17:24:08 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,81 +0,0 @@ -<tool id="fastq_filter_by_id" name="Filter FASTQ by ID" version="0.0.4" hidden="true"> - <description>from a tabular file</description> - <command interpreter="python"> -fastq_filter_by_id.py $input_tabular $columns $input_fastq -#if $output_choice_cond.output_choice=="both" - $output_pos $output_neg -#elif $output_choice_cond.output_choice=="pos" - $output_pos - -#elif $output_choice_cond.output_choice=="neg" - - $output_neg -#end if - </command> - <inputs> - <param name="input_fastq" type="data" format="fastq" label="FASTQ file to filter on the identifiers"/> - <param name="input_tabular" type="data" format="tabular" label="Tabular file containing FASTQ identifiers"/> - <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing FASTA identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> - <validator type="no_options" message="Pick at least one column"/> - </param> - <conditional name="output_choice_cond"> - <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?"> - <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two FASTA files</option> - <option value="pos">Just positive matches (ID on list), as a single FASTA file</option> - <option value="neg">Just negative matches (ID not on list), as a single FASTA file</option> - </param> - <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml --> - <when value="both" /> - <when value="pos" /> - <when value="neg" /> - </conditional> - </inputs> - <outputs> - <data name="output_pos" format="fastq" label="With matched ID"> - <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> - <change_format> - <when input_dataset="input_fastq" attribute="extension" value="fastqsanger" format="fastqsanger" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqillumina" format="fastqillumina" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> - </change_format> - <filter>output_choice_cond["output_choice"] != "neg"</filter> - </data> - <data name="output_neg" format="fastq" label="Without matched ID"> - <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> - <change_format> - <when input_dataset="input_fastq" attribute="extension" value="fastqsanger" format="fastqsanger" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqillumina" format="fastqillumina" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> - </change_format> - <filter>output_choice_cond["output_choice"] != "pos"</filter> - </data> - </outputs> - <tests> - </tests> - <help> - -**Deprecated** - -This tool is now obsolete, and should not be used in future. It has been -replaced by a more general version covering FASTA, FASTQ and SFF in one -single tool. - -**What it does** - -By default it divides a FASTQ file in two, those sequences with or without an -ID present in the tabular file column(s) specified. You can opt to have a -single output file of just the matching records, or just the non-matching ones. - -Note that the order of sequences in the original FASTA file is preserved. -Also, if any sequences share an identifier, duplicates are not removed. - -**Example Usage** - -You may have performed some kind of contamination search, for example running -BLASTN against a database of cloning vectors or bacteria, giving you a tabular -file containing read identifiers. You could use this tool to extract only the -reads without BLAST matches (i.e. those which do not match your contaminant -database). - - </help> -</tool> |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq/fastq_filter_by_id.xml~ --- a/tools/fastq/fastq_filter_by_id.xml~ Tue Jun 07 17:24:08 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,75 +0,0 @@ -<tool id="fastq_filter_by_id" name="Filter FASTQ by ID" version="0.0.2"> - <description>from a tabular file</description> - <command interpreter="python"> -fastq_filter_by_id.py $input_tabular $columns $input_fastq -#if $output_choice_cond.output_choice=="both" - $output_pos $output_neg -#elif $output_choice_cond.output_choice=="pos" - $output_pos - -#elif $output_choice_cond.output_choice=="neg" - - $output_neg -#end if - </command> - <inputs> - <param name="input_fastq" type="data" format="fastq" label="FASTQ file to filter on the identifiers"/> - <param name="input_tabular" type="data" format="tabular" label="Tabular file containing FASTQ identifiers"/> - <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing FASTA identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> - <validator type="no_options" message="Pick at least one column"/> - </param> - <conditional name="output_choice_cond"> - <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?"> - <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two FASTA files</option> - <option value="pos">Just positive matches (ID on list), as a single FASTA file</option> - <option value="neg">Just negative matches (ID not on list), as a single FASTA file</option> - </param> - <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml --> - <when value="both" /> - <when value="pos" /> - <when value="neg" /> - </conditional> - </inputs> - <outputs> - <data name="output_pos" format="fastq" label="With matched ID"> - <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> - <change_format> - <when input_dataset="input_fastq" attribute="extension" value="fastqsanger" format="fastqsanger" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqillumina" format="fastqillumina" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> - </change_format> - <filter>output_choice_cond["output_choice"] != "neg"</filter> - </data> - <data name="output_neg" format="fastq" label="Without matched ID"> - <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> - <change_format> - <when input_dataset="input_fastq" attribute="extension" value="fastqsanger" format="fastqsanger" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqillumina" format="fastqillumina" /> - <when input_dataset="input_fastq" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> - </change_format> - <filter>output_choice_cond["output_choice"] != "pos"</filter> - </data> - </outputs> - <tests> - </tests> - <help> - -**What it does** - -By default it divides a FASTQ file in two, those sequences with or without an -ID present in the tabular file column(s) specified. You can opt to have a -single output file of just the matching records, or just the non-matching ones. - -Note that the order of sequences in the original FASTA file is preserved. -Also, if any sequences share an identifier, duplicates are not removed. - -**Example Usage** - -You may have performed some kind of contamination search, for example running -BLASTN against a database of cloning vectors or bacteria, giving you a tabular -file containing read identifiers. You could use this tool to extract only the -reads without BLAST matches (i.e. those which do not match your contaminant -database). - - </help> -</tool> |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq_filter_by_id/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fastq_filter_by_id/README.rst Fri Feb 03 05:34:18 2017 -0500 |
b |
@@ -0,0 +1,114 @@ +Obsolete +======== + +This tool is now obsolete, having been replaced by a more general version +covering the FASTA, FASTQ and SFF sequence formats in a single tool. You +should only install this tool if you need to support existing workflows +which used it. + +Galaxy tool to filter FASTQ sequences by ID +=========================================== + +This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below (MIT licence). + +This tool is a short Python script (using the Galaxy library functions) which +divides a FASTQ file in two, those sequences with or without an ID present in +the specified column(s) of a tabular file. Example uses include filtering based +on search results from a tool like NCBI BLAST before assembly. + +There are just two files to install: + +* fastq_filter_by_id.py (the Python script) +* fastq_filter_by_id.xml (the Galaxy tool definition) + +The suggested location is next to the similarly named fastq_filter.py and +fastq_filter.xml files which are included with Galaxy, i.e. in the Galaxy +folder tools/fastq + +You will also need to modify the tools_conf.xml file to tell Galaxy to offer +the tool. The suggested location is next to the fastq_filter.xml entry. Simply +add the line: + +<tool file="fastq/fastq_filter_by_id.xml" /> + +That's it. + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.0.1 - Initial verion (not publicly released) +v0.0.2 - Allow both, just pos or just neg output files + - Preserve the FASTQ variant in the XML wrapper +v0.0.3 - Fixed bug when generating non-matching FASTQ file only +v0.0.4 - Deprecated, marked as hidden in the XML +v0.0.5 - Explicit dependency on ``galaxy_sequence_utils``. + - Citation information (Cock et al. 2013). + - Explicitly record version via ``<version_command>``. + - Use ``format_source``/``metadata_source`` idiom for output format. + - Use standard MIT license (was previously using the MIT/BSD style + Biopython Licence Agreement). +======= ====================================================================== + + +Developers +========== + +This script and other tools for filtering FASTA, FASTQ and SFF files were +initially developed on the following hg branches: +http://bitbucket.org/peterjc/galaxy-central/src/tools +http://bitbucket.org/peterjc/galaxy-central/src/fasta_filter + +It is now under GitHub https://github.com/peterjc/pico_galaxy/ + +For pushing a release to the test or main "Galaxy Tool Shed", use the following +Planemo commands (which requires you have set your Tool Shed access details in +``~/.planemo.yml`` and that you have access rights on the Tool Shed):: + + $ planemo shed_update -t testtoolshed --check_diff tools/fastq_filter_by_id/ + ... + +or:: + + $ planemo shed_update -t toolshed --check_diff tools/fastq_filter_by_id/ + ... + +To just build and check the tar ball, use:: + + $ planemo shed_upload --tar_only tools/fastq_filter_by_id/ + ... + $ tar -tzf shed_upload.tar.gz + tools/fastq_filter_by_id/README.rst + tools/fastq_filter_by_id/fastq_filter_by_id.py + tools/fastq_filter_by_id/fastq_filter_by_id.xml + tools/fastq_filter_by_id/tool_dependencies.xml + test-data/empty_file.dat + test-data/sanger-pairs-mixed.fastq + test-data/sanger-pairs-names.tabular + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq_filter_by_id/fastq_filter_by_id.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fastq_filter_by_id/fastq_filter_by_id.py Fri Feb 03 05:34:18 2017 -0500 |
[ |
@@ -0,0 +1,95 @@ +#!/usr/bin/env python +"""Filter a FASTQ file with IDs from a tabular file, e.g. from BLAST. + +NOTE - This script is now OBSOLETE, having been replaced by a new verion +which handles FASTA, FASTQ and SFF all in one. + +Takes five command line options, tabular filename, ID column numbers +(comma separated list using one based counting), input FASTA filename, and +two output FASTA filenames (for records with and without the given IDs). + +If either output filename is just a minus sign, that file is not created. +This is intended to allow output for just the matched (or just the non-matched) +records. + +Note in the default NCBI BLAST+ tabular output, the query sequence ID is +in column one, and the ID of the match from the database is in column two. +Here sensible values for the column numbers would therefore be "1" or "2". + +This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See accompanying text file for licence details (MIT license). +""" +import sys + +if "-v" in sys.argv or "--version" in sys.argv: + print "v0.0.5" + sys.exit(0) + +from galaxy_utils.sequence.fastq import fastqReader, fastqWriter + +# Parse Command Line +try: + tabular_file, cols_arg, in_file, out_positive_file, out_negative_file = sys.argv[1:] +except ValueError: + sys.exit("Expected five arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv))) +try: + columns = [int(arg)-1 for arg in cols_arg.split(",")] +except ValueError: + sys.exit("Expected list of columns (comma separated integers), got %s" % cols_arg) + +# Read tabular file and record all specified identifiers +ids = set() +handle = open(tabular_file, "rU") +if len(columns) > 1: + # General case of many columns + for line in handle: + if line.startswith("#"): + # Ignore comments + continue + parts = line.rstrip("\n").split("\t") + for col in columns: + ids.add(parts[col]) + print "Using %i IDs from %i columns of tabular file" % (len(ids), len(columns)) +else: + # Single column, special case speed up + col = columns[0] + for line in handle: + if not line.startswith("#"): + ids.add(line.rstrip("\n").split("\t")[col]) + print "Using %i IDs from tabular file" % (len(ids)) +handle.close() + +# Write filtered FASTQ file based on IDs from tabular file +reader = fastqReader(open(in_file, "rU")) +if out_positive_file != "-" and out_negative_file != "-": + print "Generating two FASTQ files" + positive_writer = fastqWriter(open(out_positive_file, "w")) + negative_writer = fastqWriter(open(out_negative_file, "w")) + for record in reader: + # The [1:] is because the fastaReader leaves the @ on the identifer. + if record.identifier and record.identifier.split()[0][1:] in ids: + positive_writer.write(record) + else: + negative_writer.write(record) + positive_writer.close() + negative_writer.close() +elif out_positive_file != "-": + print "Generating matching FASTQ file" + positive_writer = fastqWriter(open(out_positive_file, "w")) + for record in reader: + # The [1:] is because the fastaReader leaves the @ on the identifer. + if record.identifier and record.identifier.split()[0][1:] in ids: + positive_writer.write(record) + positive_writer.close() +elif out_negative_file != "-": + print "Generating non-matching FASTQ file" + negative_writer = fastqWriter(open(out_negative_file, "w")) + for record in reader: + # The [1:] is because the fastaReader leaves the @ on the identifer. + if not record.identifier or record.identifier.split()[0][1:] not in ids: + negative_writer.write(record) + negative_writer.close() +else: + sys.exit("Neither output file requested") +reader.close() |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq_filter_by_id/fastq_filter_by_id.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fastq_filter_by_id/fastq_filter_by_id.xml Fri Feb 03 05:34:18 2017 -0500 |
[ |
@@ -0,0 +1,82 @@ +<tool id="fastq_filter_by_id" name="Filter FASTQ by ID" version="0.0.5" hidden="true"> + <description>from a tabular file</description> + <requirements> + <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement> + </requirements> + <version_command interpreter="python">fastq_filter_by_id.py --version</version_command> + <command interpreter="python"> +fastq_filter_by_id.py $input_tabular $columns $input_fastq +#if $output_choice_cond.output_choice=="both" + $output_pos $output_neg +#elif $output_choice_cond.output_choice=="pos" + $output_pos - +#elif $output_choice_cond.output_choice=="neg" + - $output_neg +#end if + </command> + <inputs> + <param name="input_fastq" type="data" format="fastq" label="FASTQ file to filter on the identifiers"/> + <param name="input_tabular" type="data" format="tabular" label="Tabular file containing FASTQ identifiers"/> + <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing FASTA identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> + <validator type="no_options" message="Pick at least one column"/> + </param> + <conditional name="output_choice_cond"> + <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?"> + <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two FASTA files</option> + <option value="pos">Just positive matches (ID on list), as a single FASTA file</option> + <option value="neg">Just negative matches (ID not on list), as a single FASTA file</option> + </param> + <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml --> + <when value="both" /> + <when value="pos" /> + <when value="neg" /> + </conditional> + </inputs> + <outputs> + <data name="output_pos" format_source="input_fastq" metadata_source="input_fastq" label="With matched ID"> + <filter>output_choice_cond["output_choice"] != "neg"</filter> + </data> + <data name="output_neg" format_source="input_fastq" metadata_source="input_fastq" label="Without matched ID"> + <filter>output_choice_cond["output_choice"] != "pos"</filter> + </data> + </outputs> + <tests> + <test> + <param name="input_fastq" value="sanger-pairs-mixed.fastq" ftype="fastq" /> + <param name="input_tabular" value="sanger-pairs-names.tabular" ftype="tabular" /> + <param name="columns" value="1" /> + <param name="output_choice" value="both" /> + <output name="output_pos" file="empty_file.dat" ftype="fastq" /> + <output name="output_neg" file="sanger-pairs-mixed.fastq" ftype="fastq" /> + </test> + </tests> + <help> + +**Deprecated** + +This tool is now obsolete, and should not be used in future. It has been +replaced by a more general version covering FASTA, FASTQ and SFF in one +single tool. + +**What it does** + +By default it divides a FASTQ file in two, those sequences with or without an +ID present in the tabular file column(s) specified. You can opt to have a +single output file of just the matching records, or just the non-matching ones. + +Note that the order of sequences in the original FASTA file is preserved. +Also, if any sequences share an identifier, duplicates are not removed. + +**Example Usage** + +You may have performed some kind of contamination search, for example running +BLASTN against a database of cloning vectors or bacteria, giving you a tabular +file containing read identifiers. You could use this tool to extract only the +reads without BLAST matches (i.e. those which do not match your contaminant +database). + + </help> + <citations> + <citation type="doi">10.7717/peerj.167</citation> + </citations> +</tool> |
b |
diff -r d570cc324779 -r e0041942a12d tools/fastq_filter_by_id/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fastq_filter_by_id/tool_dependencies.xml Fri Feb 03 05:34:18 2017 -0500 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="galaxy_sequence_utils" version="1.0.1"> + <repository changeset_revision="c1ab450748ba" name="package_galaxy_sequence_utils_1_0_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |