Mercurial > repos > peterjc > seq_length
annotate tools/seq_length/seq_length.xml @ 5:ea3c01e08251 draft default tip
Remove legacy tool_dependencies.xml
author | peterjc |
---|---|
date | Thu, 30 Nov 2023 09:58:47 +0000 |
parents | 17caf7a7c2c5 |
children |
rev | line source |
---|---|
4 | 1 <tool id="seq_length" name="Sequence lengths" version="0.0.5"> |
1 | 2 <description>from FASTA, QUAL, FASTQ, or SFF file</description> |
0 | 3 <requirements> |
4 | 4 <requirement type="package" version="1.81">biopython</requirement> |
0 | 5 </requirements> |
6 <version_command> | |
7 python $__tool_directory__/seq_length.py --version | |
8 </version_command> | |
9 <command detect_errors="aggressive"> | |
2 | 10 python $__tool_directory__/seq_length.py -i '$input_file' -f '$input_file.ext' -o '$output_file' |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
11 #if $stats |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
12 -s |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
13 #end if |
0 | 14 </command> |
15 <inputs> | |
16 <param name="input_file" type="data" format="fasta,qual,fastq,sff" label="Sequence file" help="FASTA, QUAL, FASTQ, or SFF format." /> | |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
17 <param name="stats" type="boolean" label="Compute additional statistics (median, N50)" /> |
0 | 18 </inputs> |
19 <outputs> | |
20 <data name="output_file" format="tabular" label="${on_string} length"/> | |
21 </outputs> | |
22 <tests> | |
23 <test> | |
24 <param name="input_file" value="four_human_proteins.fasta" ftype="fasta" /> | |
25 <output name="output_file" file="four_human_proteins.length.tabular" ftype="tabular" /> | |
2 | 26 <assert_stdout> |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
27 <has_line line="4 sequences, total length 3297, mean 824.2" /> |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
28 <has_line line="Shortest 348, longest 1382" /> |
2 | 29 </assert_stdout> |
0 | 30 </test> |
31 <test> | |
32 <param name="input_file" value="SRR639755_sample_strict.fastq" ftype="fastq" /> | |
33 <output name="output_file" file="SRR639755_sample_strict.length.tabular" ftype="tabular" /> | |
2 | 34 <assert_stdout> |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
35 <has_line line="2 sequences, total length 202, mean 101.0" /> |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
36 <has_line line="Shortest 101, longest 101" /> |
2 | 37 </assert_stdout> |
38 </test> | |
39 <test> | |
40 <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" /> | |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
41 <param name="stats" value="true" /> |
2 | 42 <output name="output_file" file="MID4_GLZRM4E04_rnd30.length.tabular" ftype="tabular" /> |
43 <assert_stdout> | |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
44 <has_line line="30 sequences, total length 7504, mean 250.1" /> |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
45 <has_line line="Shortest 42, longest 473" /> |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
46 <has_line line="Median length 269.5, N50 345" /> |
2 | 47 </assert_stdout> |
0 | 48 </test> |
49 </tests> | |
50 <help> | |
51 **What it does** | |
52 | |
53 Takes a FASTA, QUAL, FASTQ or Standard Flowgram Format (SFF) file and produces a | |
54 two-column tabular file containing one line per sequence giving the sequence | |
55 identifier and the associated sequence's length. | |
56 | |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
57 Additionally, the tool will report some basic statistics about the sequences |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
58 (visible via the output file's meta data, or the stdout log for the job), |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
59 namely the number of sequences, total length, mean length, minimum length and |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
60 maximum length. |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
61 |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
62 You can optionally request additional statistics be computed which will use |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
63 more RAM and take fractionally longer, namely the median and N50. |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
64 |
0 | 65 WARNING: If there are any duplicate sequence identifiers, these will all appear |
66 in the tabular output. | |
67 | |
3
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
68 If using SFF files, this will use the trimmed lengths of the reads. |
fcdf11fb34de
v0.0.4 More statistics including optional N50 and median
peterjc
parents:
2
diff
changeset
|
69 |
0 | 70 **References** |
71 | |
72 This tool uses Biopython's ``SeqIO`` library to read sequences, so please cite | |
73 the Biopython application note (and Galaxy too of course): | |
74 | |
75 Cock et al (2009). Biopython: freely available Python tools for computational | |
76 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. | |
4 | 77 https://doi.org/10.1093/bioinformatics/btp163 pmid:19304878. |
0 | 78 |
79 This tool is available to install into other Galaxy Instances via the Galaxy | |
80 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_length | |
81 </help> | |
82 <citations> | |
83 <citation type="doi">10.1093/bioinformatics/btp163</citation> | |
84 </citations> | |
85 </tool> |