Mercurial > repos > peterjc > seq_filter_by_id
diff tools/filters/seq_filter_by_id.xml @ 1:262f08104540 draft
Uploaded v0.0.4 which includes a unit test and is faster at filtering FASTA files with large records (e.g. whole chromosomes)
author | peterjc |
---|---|
date | Mon, 15 Apr 2013 12:27:30 -0400 |
parents | 5844f6a450ed |
children | abdd608c869b |
line wrap: on
line diff
--- a/tools/filters/seq_filter_by_id.xml Tue Jun 07 17:24:30 2011 -0400 +++ b/tools/filters/seq_filter_by_id.xml Mon Apr 15 12:27:30 2013 -0400 @@ -1,5 +1,6 @@ -<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.1"> +<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.4"> <description>from a tabular file</description> + <version_command interpreter="python">seq_filter_by_id.py --version</version_command> <command interpreter="python"> seq_filter_by_id.py $input_tabular $columns $input_file $input_file.ext #if $output_choice_cond.output_choice=="both" @@ -11,7 +12,7 @@ #end if </command> <inputs> - <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" description="FASTA, FASTQ, or SFF format." /> + <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" help="FASTA, FASTQ, or SFF format." /> <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"> <validator type="no_options" message="Pick at least one column"/> @@ -55,6 +56,13 @@ </data> </outputs> <tests> + <test> + <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" /> + <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" /> + <param name="columns" value="1" /> + <param name="output_choice" value="pos" /> + <output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" /> + </test> </tests> <requirements> <requirement type="python-module">Bio</requirement> @@ -80,7 +88,7 @@ reads without BLAST matches (i.e. those which do not match your contaminant database). -You may have a file of FASTA sequences which has been run some some analysis +You may have a file of FASTA sequences which has been used with some analysis tool giving tabular output, which has then been filtered on some criteria. You can then use this tool to divide the original FASTA file into those entries matching or not matching your criteria (those with or without their identifier