annotate tools/seq_length/seq_length.xml @ 5:ea3c01e08251 draft default tip

Remove legacy tool_dependencies.xml
author peterjc
date Thu, 30 Nov 2023 09:58:47 +0000
parents 17caf7a7c2c5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
17caf7a7c2c5 Bump Biopython dependency
peterjc
parents: 3
diff changeset
1 <tool id="seq_length" name="Sequence lengths" version="0.0.5">
1
458f987918a6 Faster FASTA and FASTQ, v0.0.2
peterjc
parents: 0
diff changeset
2 <description>from FASTA, QUAL, FASTQ, or SFF file</description>
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
3 <requirements>
4
17caf7a7c2c5 Bump Biopython dependency
peterjc
parents: 3
diff changeset
4 <requirement type="package" version="1.81">biopython</requirement>
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
5 </requirements>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
6 <version_command>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
7 python $__tool_directory__/seq_length.py --version
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
8 </version_command>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
9 <command detect_errors="aggressive">
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
10 python $__tool_directory__/seq_length.py -i '$input_file' -f '$input_file.ext' -o '$output_file'
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
11 #if $stats
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
12 -s
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
13 #end if
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
14 </command>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
15 <inputs>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
16 <param name="input_file" type="data" format="fasta,qual,fastq,sff" label="Sequence file" help="FASTA, QUAL, FASTQ, or SFF format." />
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
17 <param name="stats" type="boolean" label="Compute additional statistics (median, N50)" />
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
18 </inputs>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
19 <outputs>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
20 <data name="output_file" format="tabular" label="${on_string} length"/>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
21 </outputs>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
22 <tests>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
23 <test>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
24 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" />
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
25 <output name="output_file" file="four_human_proteins.length.tabular" ftype="tabular" />
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
26 <assert_stdout>
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
27 <has_line line="4 sequences, total length 3297, mean 824.2" />
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
28 <has_line line="Shortest 348, longest 1382" />
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
29 </assert_stdout>
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
30 </test>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
31 <test>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
32 <param name="input_file" value="SRR639755_sample_strict.fastq" ftype="fastq" />
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
33 <output name="output_file" file="SRR639755_sample_strict.length.tabular" ftype="tabular" />
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
34 <assert_stdout>
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
35 <has_line line="2 sequences, total length 202, mean 101.0" />
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
36 <has_line line="Shortest 101, longest 101" />
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
37 </assert_stdout>
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
38 </test>
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
39 <test>
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
40 <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" />
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
41 <param name="stats" value="true" />
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
42 <output name="output_file" file="MID4_GLZRM4E04_rnd30.length.tabular" ftype="tabular" />
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
43 <assert_stdout>
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
44 <has_line line="30 sequences, total length 7504, mean 250.1" />
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
45 <has_line line="Shortest 42, longest 473" />
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
46 <has_line line="Median length 269.5, N50 345" />
2
6f29bb9960ac v0.0.3 - Fixed SFF; more tests
peterjc
parents: 1
diff changeset
47 </assert_stdout>
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
48 </test>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
49 </tests>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
50 <help>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
51 **What it does**
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
52
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
53 Takes a FASTA, QUAL, FASTQ or Standard Flowgram Format (SFF) file and produces a
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
54 two-column tabular file containing one line per sequence giving the sequence
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
55 identifier and the associated sequence's length.
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
56
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
57 Additionally, the tool will report some basic statistics about the sequences
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
58 (visible via the output file's meta data, or the stdout log for the job),
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
59 namely the number of sequences, total length, mean length, minimum length and
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
60 maximum length.
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
61
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
62 You can optionally request additional statistics be computed which will use
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
63 more RAM and take fractionally longer, namely the median and N50.
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
64
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
65 WARNING: If there are any duplicate sequence identifiers, these will all appear
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
66 in the tabular output.
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
67
3
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
68 If using SFF files, this will use the trimmed lengths of the reads.
fcdf11fb34de v0.0.4 More statistics including optional N50 and median
peterjc
parents: 2
diff changeset
69
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
70 **References**
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
71
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
72 This tool uses Biopython's ``SeqIO`` library to read sequences, so please cite
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
73 the Biopython application note (and Galaxy too of course):
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
74
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
75 Cock et al (2009). Biopython: freely available Python tools for computational
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
76 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
4
17caf7a7c2c5 Bump Biopython dependency
peterjc
parents: 3
diff changeset
77 https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
0
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
78
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
79 This tool is available to install into other Galaxy Instances via the Galaxy
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
80 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_length
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
81 </help>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
82 <citations>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
83 <citation type="doi">10.1093/bioinformatics/btp163</citation>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
84 </citations>
c323e29a8248 Initial release v0.0.1
peterjc
parents:
diff changeset
85 </tool>