1
|
1 <tool id="seq_composition" name="Sequence composition" version="0.0.2">
|
0
|
2 <description>Count bases or amino-acids</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.64">biopython</requirement>
|
|
5 <requirement type="python-module">Bio</requirement>
|
|
6 </requirements>
|
1
|
7 <stdio>
|
|
8 <!-- Anything other than zero is an error -->
|
|
9 <exit_code range="1:" />
|
|
10 <exit_code range=":-1" />
|
|
11 </stdio>
|
0
|
12 <version_command interpreter="python">seq_composition.py --version</version_command>
|
|
13 <command interpreter="python">
|
|
14 seq_composition.py -o "$output_file"
|
|
15 ##For loop over inputs
|
|
16 #for i in $input_file
|
|
17 --$i.ext "${i}"
|
|
18 #end for
|
|
19 </command>
|
|
20 <inputs>
|
|
21 <param name="input_file" type="data" format="fasta,fastq,sff" multiple="true" label="Sequence file" help="FASTA, FASTQ, or SFF format." />
|
|
22 </inputs>
|
|
23 <outputs>
|
|
24 <data name="output_file" format="tabular" label="Sequence composition ${on_string}"/>
|
|
25 </outputs>
|
|
26 <tests>
|
|
27 <test>
|
|
28 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />
|
|
29 <output name="output_file" file="four_human_proteins.seq_composition.tabular" ftype="tabular" />
|
|
30 </test>
|
|
31 <test>
|
|
32 <param name="input_file" value="ecoli.fastq" ftype="fastq" />
|
|
33 <output name="output_file" file="ecoli.seq_composition.tabular" ftype="tabular" />
|
|
34 </test>
|
|
35 <test>
|
|
36 <param name="input_file" value="ecoli.fastq" ftype="fastqsanger" />
|
|
37 <output name="output_file" file="ecoli.seq_composition.tabular" ftype="tabular" />
|
|
38 </test>
|
|
39 <test>
|
|
40 <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" />
|
|
41 <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.seq_composition.tabular" ftype="tabular"/>
|
|
42 </test>
|
|
43 </tests>
|
|
44 <help>
|
|
45 **What it does**
|
|
46
|
|
47 Takes input files of sequences (typically FASTA or FASTQ, but also
|
|
48 Standard Flowgram Format (SFF) is supported), counts all the letters
|
|
49 in each sequence, and returns a summary table of their counts and
|
|
50 percentages.
|
|
51
|
|
52 **Citation**
|
|
53
|
|
54 This tool uses Biopython, so if you use this Galaxy tool in work leading to a
|
|
55 scientific publication please cite the following paper:
|
|
56
|
|
57 Cock et al (2009). Biopython: freely available Python tools for computational
|
|
58 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
|
|
59 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
|
|
60
|
|
61 This tool is available to install into other Galaxy Instances via the Galaxy
|
|
62 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_composition
|
|
63 </help>
|
|
64 <citations>
|
|
65 <citation type="doi">10.1093/bioinformatics/btp163</citation>
|
|
66 </citations>
|
|
67 </tool>
|