annotate validate_fasta_database.xml @ 9:d7f67cc9a9e7 draft default tip

planemo upload commit ea998f84bf4eb1f6f6ece4eb7fc725920f9c78c4-dirty
author caleb-easterly
date Thu, 14 Sep 2017 11:13:16 -0400
parents ba8bbfd8933e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
d7f67cc9a9e7 planemo upload commit ea998f84bf4eb1f6f6ece4eb7fc725920f9c78c4-dirty
caleb-easterly
parents: 8
diff changeset
1 <tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.4">
6
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
2 <requirements>
9
d7f67cc9a9e7 planemo upload commit ea998f84bf4eb1f6f6ece4eb7fc725920f9c78c4-dirty
caleb-easterly
parents: 8
diff changeset
3 <requirement type="package" version="1.0">validate-fasta-database</requirement>
6
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
4 </requirements>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
5 <stdio>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
6 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
7 </stdio>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
9
d7f67cc9a9e7 planemo upload commit ea998f84bf4eb1f6f6ece4eb7fc725920f9c78c4-dirty
caleb-easterly
parents: 8
diff changeset
9 validate-fasta-database
6
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
10 '$inFasta'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
11 '$goodFastaOut'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
12 '$badFastaOut'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
13 '$crashIfInvalid'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
14 '$checkIsProtein'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
15 '$customLetters'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
16 '$checkHasAccession'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
17 '$minimumLength'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
18 ]]></command>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
19 <inputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
20 <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
21 <param type="boolean" name="crashIfInvalid"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
22 label="Fail job if invalid FASTA headers detected?"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
23 value="false"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
24 <param type="boolean" name="checkIsProtein"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
25 label="Ensure that sequence is a valid amino acid sequence?"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
26 help="Checks that sequence only contains the 20 essential amino
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
27 acids (and optional non-standard AAs), and checks that is not DNA or RNA"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
28 value="true"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
29 <param type="text" name="customLetters" value=""
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
30 label="Optional: add one letter codes for any non-standard amino acids that you are using. "
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
31 help="Anything that is not an upper case letter [A-Z] will be ignored."/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
32 <param type="boolean" name="checkHasAccession"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
33 label="Only pass sequences with accession numbers?"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
34 value="false"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
35 <param type="integer" name="minimumLength"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
36 label="Minimum length for sequences to pass"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
37 value="0"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
38 </inputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
39 <outputs>
9
d7f67cc9a9e7 planemo upload commit ea998f84bf4eb1f6f6ece4eb7fc725920f9c78c4-dirty
caleb-easterly
parents: 8
diff changeset
40 <data name="goodFastaOut" format="fasta" label="Validate FASTA ${on_string}: passed"/>
d7f67cc9a9e7 planemo upload commit ea998f84bf4eb1f6f6ece4eb7fc725920f9c78c4-dirty
caleb-easterly
parents: 8
diff changeset
41 <data name="badFastaOut" format="fasta" label="Validate FASTA ${on_string}: failed"/>
6
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
42 </outputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
43 <tests>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
44 <!-- test general filtering -->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
45 <test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
46 <param name="inFasta" value="fastaFilteringTest_IN.fasta"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
47 <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" />
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
48 <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" />
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
49 </test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
50
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
51 <!--test filtering out genetic sequences and bad protein sequences-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
52 <test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
53 <param name="inFasta" value="geneticFiltering.in"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
54 <param name="checkIsProtein" value="true"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
55 <output name="goodFastaOut" file="geneticFilteringGood.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
56 <output name="badFastaOut" file="geneticFilteringBad.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
57 </test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
58
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
59 <test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
60 <param name="inFasta" value="length5Filtering.in"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
61 <param name="minimumLength" value="5"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
62 <output name="goodFastaOut" file="length5FilteringGood.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
63 <output name="badFastaOut" file="length5FilteringBad.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
64 </test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
65 </tests>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
66 <help>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
67
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
68 <![CDATA[
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
69 **Notes**
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
70
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
71 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
72 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
73
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
74 **Output**
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
75
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
76 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
77 The failed sequences may be examined for typos and other errors.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
78
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
79 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
80
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
81 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats:
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
82 * UniProt,
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
83 * SwissProt (starts with ">sw|" or ">SW|")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
84 * NCBI (starts with ">gi|" or ">GI|")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
85 * Halobacterium from Max Planck (starts with "OE")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
86 * H Influenza, from Novartis (starts with ">hflu_")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
87 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
88 * M Tuberculosis (starts with ">M. tub")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
89 * Saccharomyces Genome Database (contains "SGDID")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
90 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
91 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
92 * UPS (contains "\_HUMAN\_UPS")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
93
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
94 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
95 ]]>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
96 </help>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
97 <citations>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
98 <citation type="bibtex">
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
99 @misc{fastaValidationTool,
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
100 author = {The GalaxyP Team},
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
101 date = {22 June 2017},
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
102 title = {FASTA Database Validation Tool}
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
103 }
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
104 </citation>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
105 </citations>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
106 </tool>