comparison tools/fastq/fastq_filter_by_id.xml @ 3:e0041942a12d draft default tip

v0.0.5 - galaxy_sequence_utils dependency and other cleanups inc using MIT license
author peterjc
date Fri, 03 Feb 2017 05:34:18 -0500
parents d570cc324779
children
comparison
equal deleted inserted replaced
2:d570cc324779 3:e0041942a12d
1 <tool id="fastq_filter_by_id" name="Filter FASTQ by ID" version="0.0.4" hidden="true">
2 <description>from a tabular file</description>
3 <command interpreter="python">
4 fastq_filter_by_id.py $input_tabular $columns $input_fastq
5 #if $output_choice_cond.output_choice=="both"
6 $output_pos $output_neg
7 #elif $output_choice_cond.output_choice=="pos"
8 $output_pos -
9 #elif $output_choice_cond.output_choice=="neg"
10 - $output_neg
11 #end if
12 </command>
13 <inputs>
14 <param name="input_fastq" type="data" format="fastq" label="FASTQ file to filter on the identifiers"/>
15 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing FASTQ identifiers"/>
16 <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing FASTA identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
17 <validator type="no_options" message="Pick at least one column"/>
18 </param>
19 <conditional name="output_choice_cond">
20 <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?">
21 <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two FASTA files</option>
22 <option value="pos">Just positive matches (ID on list), as a single FASTA file</option>
23 <option value="neg">Just negative matches (ID not on list), as a single FASTA file</option>
24 </param>
25 <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml -->
26 <when value="both" />
27 <when value="pos" />
28 <when value="neg" />
29 </conditional>
30 </inputs>
31 <outputs>
32 <data name="output_pos" format="fastq" label="With matched ID">
33 <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
34 <change_format>
35 <when input_dataset="input_fastq" attribute="extension" value="fastqsanger" format="fastqsanger" />
36 <when input_dataset="input_fastq" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
37 <when input_dataset="input_fastq" attribute="extension" value="fastqillumina" format="fastqillumina" />
38 <when input_dataset="input_fastq" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
39 </change_format>
40 <filter>output_choice_cond["output_choice"] != "neg"</filter>
41 </data>
42 <data name="output_neg" format="fastq" label="Without matched ID">
43 <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
44 <change_format>
45 <when input_dataset="input_fastq" attribute="extension" value="fastqsanger" format="fastqsanger" />
46 <when input_dataset="input_fastq" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
47 <when input_dataset="input_fastq" attribute="extension" value="fastqillumina" format="fastqillumina" />
48 <when input_dataset="input_fastq" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
49 </change_format>
50 <filter>output_choice_cond["output_choice"] != "pos"</filter>
51 </data>
52 </outputs>
53 <tests>
54 </tests>
55 <help>
56
57 **Deprecated**
58
59 This tool is now obsolete, and should not be used in future. It has been
60 replaced by a more general version covering FASTA, FASTQ and SFF in one
61 single tool.
62
63 **What it does**
64
65 By default it divides a FASTQ file in two, those sequences with or without an
66 ID present in the tabular file column(s) specified. You can opt to have a
67 single output file of just the matching records, or just the non-matching ones.
68
69 Note that the order of sequences in the original FASTA file is preserved.
70 Also, if any sequences share an identifier, duplicates are not removed.
71
72 **Example Usage**
73
74 You may have performed some kind of contamination search, for example running
75 BLASTN against a database of cloning vectors or bacteria, giving you a tabular
76 file containing read identifiers. You could use this tool to extract only the
77 reads without BLAST matches (i.e. those which do not match your contaminant
78 database).
79
80 </help>
81 </tool>