Mercurial > repos > peterjc > seq_filter_by_id
comparison tools/seq_filter_by_id/seq_filter_by_id.xml @ 5:832c1fd57852 draft
v0.2.2; New options for IDs via text parameter, ignore paired read suffix; misc changes
author | peterjc |
---|---|
date | Wed, 13 May 2015 11:03:57 -0400 |
parents | 44ab4c0f7683 |
children | 03e134cae41a |
comparison
equal
deleted
inserted
replaced
4:1c36cf8ef133 | 5:832c1fd57852 |
---|---|
1 <tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.6"> | 1 <tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.2.2"> |
2 <description>from a tabular file</description> | 2 <description>from a tabular file</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="1.62">biopython</requirement> | 4 <requirement type="package" version="1.64">biopython</requirement> |
5 <requirement type="python-module">Bio</requirement> | 5 <requirement type="python-module">Bio</requirement> |
6 </requirements> | 6 </requirements> |
7 <version_command interpreter="python">seq_filter_by_id.py --version</version_command> | |
8 <command interpreter="python"> | |
9 seq_filter_by_id.py "$input_file" "$input_file.ext" | |
10 #if $output_choice_cond.output_choice=="both" | |
11 $output_pos $output_neg | |
12 #elif $output_choice_cond.output_choice=="pos" | |
13 $output_pos - | |
14 #elif $output_choice_cond.output_choice=="neg" | |
15 - $output_neg | |
16 #end if | |
17 ## TODO - Decide on best way to expose multiple ID files via the XML wrapper. | |
18 ## Single tabular file, can call the Python script with either UNION or INTERSECTION | |
19 UNION "$input_tabular" "$columns" | |
20 </command> | |
21 <stdio> | 7 <stdio> |
22 <!-- Anything other than zero is an error --> | 8 <!-- Anything other than zero is an error --> |
23 <exit_code range="1:" /> | 9 <exit_code range="1:" /> |
24 <exit_code range=":-1" /> | 10 <exit_code range=":-1" /> |
25 </stdio> | 11 </stdio> |
12 <version_command interpreter="python">seq_filter_by_id.py --version</version_command> | |
13 <command interpreter="python"> | |
14 seq_filter_by_id.py -i "$input_file" -f "$input_file.ext" | |
15 #if $output_choice_cond.output_choice=="both" | |
16 -p $output_pos -n $output_neg | |
17 #elif $output_choice_cond.output_choice=="pos" | |
18 -p $output_pos | |
19 #elif $output_choice_cond.output_choice=="neg" | |
20 -n $output_neg | |
21 #end if | |
22 #if $adv_opts.adv_opts_selector=="advanced" and $adv_opts.strip_suffix | |
23 -s | |
24 #end if | |
25 #if $id_opts.id_opts_selector=="tabular": | |
26 ## TODO - Decide on best way to expose multiple ID files via the XML wrapper. | |
27 ## Single tabular file, can call the Python script with either UNION or INTERSECTION | |
28 -l UNION "$id_opts.input_tabular" "$id_opts.columns" | |
29 #else | |
30 -t "$id_opts.id_list" | |
31 #end if | |
32 </command> | |
26 <inputs> | 33 <inputs> |
27 <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" help="FASTA, FASTQ, or SFF format." /> | 34 <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." /> |
28 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> | 35 <conditional name="id_opts"> |
29 <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> | 36 <param name="id_opts_selector" type="select" label="Filter using the ID list from"> |
30 <validator type="no_options" message="Pick at least one column"/> | 37 <option value="tabular" selected="True">tabular file</option> |
31 </param> | 38 <option value="list">provided list</option> |
39 <!-- add UNION or INTERSECTION of multiple tabular files here? --> | |
40 </param> | |
41 <when value="tabular"> | |
42 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> | |
43 <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" | |
44 label="Column(s) containing sequence identifiers" | |
45 help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> | |
46 <validator type="no_options" message="Pick at least one column"/> | |
47 </param> | |
48 </when> | |
49 <when value="list"> | |
50 <param name="id_list" type="text" size="20x80" area="True" format="tabular" | |
51 label="List of sequence identifiers (white space separated)" | |
52 help="You can use both spaces and new lines to separate your identifiers."> | |
53 <sanitizer> | |
54 <valid> | |
55 <!-- default includes underscore, hyphen, etc --> | |
56 <add value="%"/> | |
57 <add value="|"/> | |
58 </valid> | |
59 </sanitizer> | |
60 </param> | |
61 </when> | |
62 </conditional> | |
32 <conditional name="output_choice_cond"> | 63 <conditional name="output_choice_cond"> |
33 <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?"> | 64 <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?"> |
34 <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two files</option> | 65 <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two files</option> |
35 <option value="pos">Just positive matches (ID on list), as a single file</option> | 66 <option value="pos">Just positive matches (ID on list), as a single file</option> |
36 <option value="neg">Just negative matches (ID not on list), as a single file</option> | 67 <option value="neg">Just negative matches (ID not on list), as a single file</option> |
38 <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml --> | 69 <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml --> |
39 <when value="both" /> | 70 <when value="both" /> |
40 <when value="pos" /> | 71 <when value="pos" /> |
41 <when value="neg" /> | 72 <when value="neg" /> |
42 </conditional> | 73 </conditional> |
74 <conditional name="adv_opts"> | |
75 <param name="adv_opts_selector" type="select" label="Advanced Options"> | |
76 <option value="basic" selected="True">Hide Advanced Options</option> | |
77 <option value="advanced">Show Advanced Options</option> | |
78 </param> | |
79 <when value="basic" /> | |
80 <when value="advanced"> | |
81 <param name="strip_suffix" type="boolean" value="false" label="Remove typical pair read name suffices when matching identifiers?" help="Will remove suffices including Illumina /1 and /2, Roche 454 .f and .r, and assorted Sanger names like .p* and .q*" /> | |
82 </when> | |
83 </conditional> | |
43 </inputs> | 84 </inputs> |
44 <outputs> | 85 <outputs> |
45 <data name="output_pos" format="fasta" label="With matched ID"> | 86 <data name="output_pos" format_source="input_file" metadata_source="input_file" label="$input_file.name with matched ID"> |
46 <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> | |
47 <change_format> | |
48 <when input_dataset="input_file" attribute="extension" value="sff" format="sff" /> | |
49 <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" /> | |
50 <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" /> | |
51 <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> | |
52 <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" /> | |
53 <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> | |
54 </change_format> | |
55 <filter>output_choice_cond["output_choice"] != "neg"</filter> | 87 <filter>output_choice_cond["output_choice"] != "neg"</filter> |
56 </data> | 88 </data> |
57 <data name="output_neg" format="fasta" label="Without matched ID"> | 89 <data name="output_neg" format_source="input_file" metadata_source="input_file" label="$input_file.name without matched ID"> |
58 <!-- TODO - Replace this with format="input:input_fastq" if/when that works --> | |
59 <change_format> | |
60 <when input_dataset="input_file" attribute="extension" value="sff" format="sff" /> | |
61 <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" /> | |
62 <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" /> | |
63 <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" /> | |
64 <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" /> | |
65 <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" /> | |
66 </change_format> | |
67 <filter>output_choice_cond["output_choice"] != "pos"</filter> | 90 <filter>output_choice_cond["output_choice"] != "pos"</filter> |
68 </data> | 91 </data> |
69 </outputs> | 92 </outputs> |
70 <tests> | 93 <tests> |
71 <test> | 94 <test> |
73 <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" /> | 96 <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" /> |
74 <param name="columns" value="1" /> | 97 <param name="columns" value="1" /> |
75 <param name="output_choice" value="pos" /> | 98 <param name="output_choice" value="pos" /> |
76 <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" /> | 99 <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" /> |
77 </test> | 100 </test> |
101 <test> | |
102 <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" /> | |
103 <param name="input_tabular" value="k12_hypothetical_alt.tabular" ftype="tabular" /> | |
104 <param name="columns" value="1" /> | |
105 <param name="output_choice" value="pos" /> | |
106 <param name="adv_opts_selector" value="advanced" /> | |
107 <param name="strip_suffix" value="true" /> | |
108 <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" /> | |
109 </test> | |
110 <test> | |
111 <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" /> | |
112 <param name="id_opts_selector" value="list" /> | |
113 <param name="id_list" value="gi|16127999|ref|NP_414546.1|" /> | |
114 <param name="output_choice" value="pos" /> | |
115 <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" /> | |
116 </test> | |
117 <test> | |
118 <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" /> | |
119 <param name="id_opts_selector" value="list" /> | |
120 <param name="id_list" value="WTSI_1055_1a05 WTSI_1055_1g02" /> | |
121 <param name="output_choice" value="pos" /> | |
122 <param name="adv_opts_selector" value="advanced" /> | |
123 <param name="strip_suffix" value="true" /> | |
124 <output name="output_pos" file="sanger-sample.fastq" ftype="fastq" /> | |
125 </test> | |
126 <test> | |
127 <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" /> | |
128 <param name="id_opts_selector" value="tabular" /> | |
129 <param name="input_tabular" value="sanger-pairs-names.tabular" ftype="tabular" /> | |
130 <param name="columns" value="1" /> | |
131 <param name="output_choice" value="both" /> | |
132 <param name="adv_opts_selector" value="advanced" /> | |
133 <param name="strip_suffix" value="true" /> | |
134 <output name="output_pos" file="sanger-pairs-mixed.fastq" ftype="fastq" /> | |
135 <output name="output_neg" file="empty_file.dat" ftype="fastq" /> | |
136 </test> | |
137 <test> | |
138 <param name="input_file" value="sanger-pairs-mixed.fastq" ftype="fastq" /> | |
139 <param name="input_tabular" value="sanger-pairs-names.tabular" ftype="tabular" /> | |
140 <param name="columns" value="1" /> | |
141 <param name="output_choice" value="both" /> | |
142 <param name="adv_opts_selector" value="advanced" /> | |
143 <param name="strip_suffix" value="false" /> | |
144 <output name="output_pos" file="empty_file.dat" ftype="fastq" /> | |
145 <output name="output_neg" file="sanger-pairs-mixed.fastq" ftype="fastq" /> | |
146 </test> | |
78 </tests> | 147 </tests> |
79 <help> | 148 <help> |
80 **What it does** | 149 **What it does** |
81 | 150 |
82 By default it divides a FASTA, FASTQ or Standard Flowgram Format (SFF) file in | 151 By default it divides a FASTA, FASTQ or Standard Flowgram Format (SFF) file in |
83 two, those sequences with or without an ID present in the tabular file column(s) | 152 two, those sequences with or without an ID present in the tabular file column(s) |
84 specified. You can opt to have a single output file of just the matching records, | 153 specified. You can opt to have a single output file of just the matching records, |
85 or just the non-matching ones. | 154 or just the non-matching ones. |
155 | |
156 Instead of providing the identifiers in a tabular file, you can alternatively | |
157 provide them as a parameter (type or paste them into the text box). This is a | |
158 useful shortcut for extracting a few sequences of interest without first having | |
159 to prepare a tabular file. | |
86 | 160 |
87 Note that the order of sequences in the original sequence file is preserved, as | 161 Note that the order of sequences in the original sequence file is preserved, as |
88 is any Roche XML Manifest in an SFF file. Also, if any sequences share an | 162 is any Roche XML Manifest in an SFF file. Also, if any sequences share an |
89 identifier (which would be very unusual in SFF files), duplicates are not removed. | 163 identifier (which would be very unusual in SFF files), duplicates are not removed. |
90 | 164 |
120 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. | 194 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. |
121 | 195 |
122 This tool is available to install into other Galaxy Instances via the Galaxy | 196 This tool is available to install into other Galaxy Instances via the Galaxy |
123 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id | 197 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id |
124 </help> | 198 </help> |
199 <citations> | |
200 <citation type="doi">10.7717/peerj.167</citation> | |
201 <citation type="doi">10.1093/bioinformatics/btp163</citation> | |
202 </citations> | |
125 </tool> | 203 </tool> |