2
|
1 <tool id="seq_length" name="Sequence lengths" version="0.0.3">
|
1
|
2 <description>from FASTA, QUAL, FASTQ, or SFF file</description>
|
0
|
3 <requirements>
|
|
4 <!-- This is the currently the last release of Biopython which is available via Galaxy's legacy XML packaging system -->
|
|
5 <requirement type="package" version="1.67">biopython</requirement>
|
|
6 </requirements>
|
|
7 <version_command>
|
|
8 python $__tool_directory__/seq_length.py --version
|
|
9 </version_command>
|
|
10 <command detect_errors="aggressive">
|
2
|
11 python $__tool_directory__/seq_length.py -i '$input_file' -f '$input_file.ext' -o '$output_file'
|
0
|
12 </command>
|
|
13 <inputs>
|
|
14 <param name="input_file" type="data" format="fasta,qual,fastq,sff" label="Sequence file" help="FASTA, QUAL, FASTQ, or SFF format." />
|
|
15 </inputs>
|
|
16 <outputs>
|
|
17 <data name="output_file" format="tabular" label="${on_string} length"/>
|
|
18 </outputs>
|
|
19 <tests>
|
|
20 <test>
|
|
21 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />
|
|
22 <output name="output_file" file="four_human_proteins.length.tabular" ftype="tabular" />
|
2
|
23 <assert_stdout>
|
|
24 <has_line line="4 sequences, total length 3297" />
|
|
25 </assert_stdout>
|
0
|
26 </test>
|
|
27 <test>
|
|
28 <param name="input_file" value="SRR639755_sample_strict.fastq" ftype="fastq" />
|
|
29 <output name="output_file" file="SRR639755_sample_strict.length.tabular" ftype="tabular" />
|
2
|
30 <assert_stdout>
|
|
31 <has_line line="2 sequences, total length 202" />
|
|
32 </assert_stdout>
|
|
33 </test>
|
|
34 <test>
|
|
35 <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" />
|
|
36 <output name="output_file" file="MID4_GLZRM4E04_rnd30.length.tabular" ftype="tabular" />
|
|
37 <assert_stdout>
|
|
38 <has_line line="30 sequences, total length 7504" />
|
|
39 </assert_stdout>
|
0
|
40 </test>
|
|
41 </tests>
|
|
42 <help>
|
|
43 **What it does**
|
|
44
|
|
45 Takes a FASTA, QUAL, FASTQ or Standard Flowgram Format (SFF) file and produces a
|
|
46 two-column tabular file containing one line per sequence giving the sequence
|
|
47 identifier and the associated sequence's length.
|
|
48
|
|
49 WARNING: If there are any duplicate sequence identifiers, these will all appear
|
|
50 in the tabular output.
|
|
51
|
|
52 **References**
|
|
53
|
|
54 This tool uses Biopython's ``SeqIO`` library to read sequences, so please cite
|
|
55 the Biopython application note (and Galaxy too of course):
|
|
56
|
|
57 Cock et al (2009). Biopython: freely available Python tools for computational
|
|
58 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
|
|
59 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
|
|
60
|
|
61 This tool is available to install into other Galaxy Instances via the Galaxy
|
|
62 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_length
|
|
63 </help>
|
|
64 <citations>
|
|
65 <citation type="doi">10.1093/bioinformatics/btp163</citation>
|
|
66 </citations>
|
|
67 </tool>
|