Mercurial > repos > peterjc > seq_filter_by_id

diff tools/filters/seq_filter_by_id.xml @ 1:262f08104540 draft
Uploaded v0.0.4 which includes a unit test and is faster at filtering FASTA files with large records (e.g. whole chromosomes)
author: peterjc
date: Mon, 15 Apr 2013 12:27:30 -0400
parents: 5844f6a450ed
children: abdd608c869b
--- a/tools/filters/seq_filter_by_id.xml	Tue Jun 07 17:24:30 2011 -0400
+++ b/tools/filters/seq_filter_by_id.xml	Mon Apr 15 12:27:30 2013 -0400
@@ -1,5 +1,6 @@
-<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.1">
+<tool id="seq_filter_by_id" name="Filter sequences by ID" version="0.0.4">
 	<description>from a tabular file</description>
+	<version_command interpreter="python">seq_filter_by_id.py --version</version_command>
 	<command interpreter="python">
 seq_filter_by_id.py $input_tabular $columns $input_file $input_file.ext
 #if $output_choice_cond.output_choice=="both"
@@ -11,7 +12,7 @@
 #end if
 	</command>
 	<inputs>
-		<param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" description="FASTA, FASTQ, or SFF format." />
+		<param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to filter on the identifiers" help="FASTA, FASTQ, or SFF format." />
 		<param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/>
 		<param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing sequence identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
 			<validator type="no_options" message="Pick at least one column"/>
@@ -55,6 +56,13 @@
 		</data>
 	</outputs>
 	<tests>
+		<test>
+			<param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" />
+			<param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" />
+			<param name="columns" value="1" />
+			<param name="output_choice" value="pos" />
+			<output name="output_pos" file="k12_hypothetical.fasta" ftype="fasta" />
+		</test>
 	</tests>
 	<requirements>
 		<requirement type="python-module">Bio</requirement>
@@ -80,7 +88,7 @@
 reads without BLAST matches (i.e. those which do not match your contaminant
 database).
 
-You may have a file of FASTA sequences which has been run some some analysis
+You may have a file of FASTA sequences which has been used with some analysis
 tool giving tabular output, which has then been filtered on some criteria.
 You can then use this tool to divide the original FASTA file into those entries
 matching or not matching your criteria (those with or without their identifier
author	peterjc
date	Mon, 15 Apr 2013 12:27:30 -0400
parents	5844f6a450ed
children	abdd608c869b