Mercurial > repos > peterjc > seq_filter_by_id
comparison tools/filters/seq_filter_by_id.xml @ 1:262f08104540 draft
Uploaded v0.0.4 which includes a unit test and is faster at filtering FASTA files with large records (e.g. whole chromosomes)
author | peterjc |
---|---|
date | Mon, 15 Apr 2013 12:27:30 -0400 |
parents | 5844f6a450ed |
children | abdd608c869b |
comparison
equal
deleted
inserted
replaced
0:5844f6a450ed | 1:262f08104540 |
---|---|
1 <tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.1"> | 1 <tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.4"> |
2 <description>from a tabular file</description> | 2 <description>from a tabular file</description> |
3 <version_command interpreter="python">seq_filter_by_id.py --version</version_command> | |
3 <command interpreter="python"> | 4 <command interpreter="python"> |
4 seq_filter_by_id.py $input_tabular $columns $input_file $input_file.ext | 5 seq_filter_by_id.py $input_tabular $columns $input_file $input_file.ext |
5 #if $output_choice_cond.output_choice=="both" | 6 #if $output_choice_cond.output_choice=="both" |
6 $output_pos $output_neg | 7 $output_pos $output_neg |
7 #elif $output_choice_cond.output_choice=="pos" | 8 #elif $output_choice_cond.output_choice=="pos" |
9 #elif $output_choice_cond.output_choice=="neg" | 10 #elif $output_choice_cond.output_choice=="neg" |
10 - $output_neg | 11 - $output_neg |
11 #end if | 12 #end if |
12 </command> | 13 </command> |
13 <inputs> | 14 <inputs> |
14 <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" description="FASTA, FASTQ, or SFF format." /> | 15 <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" help="FASTA, FASTQ, or SFF format." /> |
15 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> | 16 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> |
16 <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> | 17 <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> |
17 <validator type="no_options" message="Pick at least one column"/> | 18 <validator type="no_options" message="Pick at least one column"/> |
18 </param> | 19 </param> |
19 <conditional name="output_choice_cond"> | 20 <conditional name="output_choice_cond"> |
53 </change_format> | 54 </change_format> |
54 <filter>output_choice_cond["output_choice"] != "pos"</filter> | 55 <filter>output_choice_cond["output_choice"] != "pos"</filter> |
55 </data> | 56 </data> |
56 </outputs> | 57 </outputs> |
57 <tests> | 58 <tests> |
59 <test> | |
60 <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" /> | |
61 <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" /> | |
62 <param name="columns" value="1" /> | |
63 <param name="output_choice" value="pos" /> | |
64 <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" /> | |
65 </test> | |
58 </tests> | 66 </tests> |
59 <requirements> | 67 <requirements> |
60 <requirement type="python-module">Bio</requirement> | 68 <requirement type="python-module">Bio</requirement> |
61 </requirements> | 69 </requirements> |
62 <help> | 70 <help> |
78 BLASTN against a database of cloning vectors or bacteria, giving you a tabular | 86 BLASTN against a database of cloning vectors or bacteria, giving you a tabular |
79 file containing read identifiers. You could use this tool to extract only the | 87 file containing read identifiers. You could use this tool to extract only the |
80 reads without BLAST matches (i.e. those which do not match your contaminant | 88 reads without BLAST matches (i.e. those which do not match your contaminant |
81 database). | 89 database). |
82 | 90 |
83 You may have a file of FASTA sequences which has been run some some analysis | 91 You may have a file of FASTA sequences which has been used with some analysis |
84 tool giving tabular output, which has then been filtered on some criteria. | 92 tool giving tabular output, which has then been filtered on some criteria. |
85 You can then use this tool to divide the original FASTA file into those entries | 93 You can then use this tool to divide the original FASTA file into those entries |
86 matching or not matching your criteria (those with or without their identifier | 94 matching or not matching your criteria (those with or without their identifier |
87 in the filtered tabular file). | 95 in the filtered tabular file). |
88 | 96 |