comparison tools/seq_select_by_id/seq_select_by_id.xml @ 6:91f55ee8fea5 draft

v0.0.11; more tests and assorting minor changes
author peterjc
date Wed, 13 May 2015 10:56:29 -0400
parents 6842c0c7bc70
children a5602454b0ad
comparison
equal deleted inserted replaced
5:1a83f5ab9e95 6:91f55ee8fea5
1 <tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.6"> 1 <tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.11">
2 <description>from a tabular file</description> 2 <description>from a tabular file</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.62">biopython</requirement> 4 <requirement type="package" version="1.62">biopython</requirement>
5 <requirement type="python-module">Bio</requirement> 5 <requirement type="python-module">Bio</requirement>
6 </requirements> 6 </requirements>
7 <version_command interpreter="python">seq_select_by_id.py --version</version_command>
8 <command interpreter="python">
9 seq_select_by_id.py $input_tabular $column $input_file $input_file.ext $output_file
10 </command>
11 <stdio> 7 <stdio>
12 <!-- Anything other than zero is an error --> 8 <!-- Anything other than zero is an error -->
13 <exit_code range="1:" /> 9 <exit_code range="1:" />
14 <exit_code range=":-1" /> 10 <exit_code range=":-1" />
15 </stdio> 11 </stdio>
12 <version_command interpreter="python">seq_select_by_id.py --version</version_command>
13 <command interpreter="python">
14 seq_select_by_id.py "$input_tabular" "$column" "$input_file" "$input_file.ext" "$output_file"
15 </command>
16 <inputs> 16 <inputs>
17 <param name="input_file" type="data" format="fasta,qual,fastq,sff" label="Sequence file to select from" help="FASTA, QUAL, FASTQ, or SFF format." /> 17 <param name="input_file" type="data" format="fasta,qual,fastq,sff" label="Sequence file to select from" help="FASTA, QUAL, FASTQ, or SFF format." />
18 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/> 18 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing sequence identifiers"/>
19 <param name="column" type="data_column" data_ref="input_tabular" multiple="False" numerical="False" label="Column containing sequence identifiers"/> 19 <param name="column" type="data_column" data_ref="input_tabular" multiple="False" numerical="False" label="Column containing sequence identifiers"/>
20 </inputs> 20 </inputs>
21 <outputs> 21 <outputs>
22 <data name="output_file" format="fasta" label="Selected sequences"> 22 <data name="output_file" format_source="input_file" metadata_source="input_file" label="Selected sequences from $input_file.name"/>
23 <!-- TODO - Replace this with format="input:input_fastq" if/when that works -->
24 <change_format>
25 <when input_dataset="input_file" attribute="extension" value="sff" format="sff" />
26 <when input_dataset="input_file" attribute="extension" value="fastq" format="fastq" />
27 <when input_dataset="input_file" attribute="extension" value="fastqsanger" format="fastqsanger" />
28 <when input_dataset="input_file" attribute="extension" value="fastqsolexa" format="fastqsolexa" />
29 <when input_dataset="input_file" attribute="extension" value="fastqillumina" format="fastqillumina" />
30 <when input_dataset="input_file" attribute="extension" value="fastqcssanger" format="fastqcssanger" />
31 </change_format>
32 </data>
33 </outputs> 23 </outputs>
34 <tests> 24 <tests>
35 <test> 25 <test>
36 <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" /> 26 <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" />
37 <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" /> 27 <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" />
38 <param name="column" value="1" /> 28 <param name="column" value="1" />
39 <output name="output_file" file="k12_hypothetical.fasta" ftype="fasta" /> 29 <output name="output_file" file="k12_hypothetical.fasta" ftype="fasta" />
30 <assert_stdout>
31 <has_line line="Indexed 10 sequences" />
32 <has_line line="Selected 1 sequences by ID" />
33 </assert_stdout>
34 </test>
35 <!-- this version has white space in the identifier column (id and description) -->
36 <test>
37 <param name="input_file" value="k12_ten_proteins.fasta" ftype="fasta" />
38 <param name="input_tabular" value="k12_hypothetical_alt.tabular" ftype="tabular" />
39 <param name="column" value="1" />
40 <output name="output_file" file="k12_hypothetical.fasta" ftype="fasta" />
41 <assert_stdout>
42 <has_line line="Indexed 10 sequences" />
43 <has_line line="Selected 1 sequences by ID" />
44 </assert_stdout>
45 <assert_stderr>
46 <has_line line="WARNING: Some of your identifiers had white space in them, using first word only. e.g.:" />
47 </assert_stderr>
48 </test>
49 <test expect_failure="true" expect_exit_code="1">
50 <param name="input_file" value="empty.fasta" ftype="fasta" />
51 <param name="input_tabular" value="k12_hypothetical.tabular" ftype="tabular" />
52 <param name="column" value="1" />
53 <assert_stdout>
54 <has_line line="Indexed 0 sequences" />
55 </assert_stdout>
56 <assert_stderr>
57 <has_line line="Identifier 'gi|16127999|ref|NP_414546.1|' not found in sequence file" />
58 </assert_stderr>
40 </test> 59 </test>
41 </tests> 60 </tests>
42 <help> 61 <help>
43 **What it does** 62 **What it does**
44 63
67 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. 86 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
68 87
69 This tool is available to install into other Galaxy Instances via the Galaxy 88 This tool is available to install into other Galaxy Instances via the Galaxy
70 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_select_by_id 89 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_select_by_id
71 </help> 90 </help>
91 <citations>
92 <citation type="doi">10.7717/peerj.167</citation>
93 <citation type="doi">10.1093/bioinformatics/btp163</citation>
94 </citations>
72 </tool> 95 </tool>