annotate validate_fasta_database.xml @ 8:ba8bbfd8933e draft

planemo upload commit cdc8b7437c69c33b17b1b100445db0d4d6e8ddde
author caleb-easterly
date Mon, 17 Jul 2017 16:51:38 -0400
parents bad73d1d7345
children d7f67cc9a9e7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
1 <tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.3">
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
2 <requirements>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
3 </requirements>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
4 <stdio>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
5 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
6 </stdio>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
7 <command detect_errors="exit_code"><![CDATA[
8
ba8bbfd8933e planemo upload commit cdc8b7437c69c33b17b1b100445db0d4d6e8ddde
caleb-easterly
parents: 6
diff changeset
8 java -jar $__tool_directory__/validate_fasta_database-0.2.jar
6
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
9 '$inFasta'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
10 '$goodFastaOut'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
11 '$badFastaOut'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
12 '$crashIfInvalid'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
13 '$checkIsProtein'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
14 '$customLetters'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
15 '$checkHasAccession'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
16 '$minimumLength'
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
17 ]]></command>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
18 <inputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
19 <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
20 <param type="boolean" name="crashIfInvalid"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
21 label="Fail job if invalid FASTA headers detected?"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
22 value="false"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
23 <param type="boolean" name="checkIsProtein"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
24 label="Ensure that sequence is a valid amino acid sequence?"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
25 help="Checks that sequence only contains the 20 essential amino
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
26 acids (and optional non-standard AAs), and checks that is not DNA or RNA"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
27 value="true"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
28 <param type="text" name="customLetters" value=""
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
29 label="Optional: add one letter codes for any non-standard amino acids that you are using. "
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
30 help="Anything that is not an upper case letter [A-Z] will be ignored."/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
31 <param type="boolean" name="checkHasAccession"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
32 label="Only pass sequences with accession numbers?"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
33 value="false"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
34 <param type="integer" name="minimumLength"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
35 label="Minimum length for sequences to pass"
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
36 value="0"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
37
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
38
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
39 <!--<conditional name="checkLength">-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
40 <!--<param type="boolean" name="checkLength" label="Filter out sequences below a minimum sequenceLength?">-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
41 <!--<option value="true"></option>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
42 <!--<option value="false"></option>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
43 <!--</param>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
44 <!--<when value="true">-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
45 <!--<param name="minimumLength" type="integer" value="0" label="Minimum sequenceLength that AA sequence must have"/>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
46 <!--</when>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
47 <!--<when value="false">-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
48 <!--</when>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
49 <!--</conditional>-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
50
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
51 </inputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
52 <outputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
53 <data name="goodFastaOut" format="fasta" label="Validate FASTA: Passed Sequences"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
54 <data name="badFastaOut" format="fasta" label="Validate FASTA: Failed Sequences"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
55 </outputs>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
56 <tests>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
57 <!-- test general filtering -->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
58 <test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
59 <param name="inFasta" value="fastaFilteringTest_IN.fasta"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
60 <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" />
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
61 <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" />
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
62 </test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
63
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
64 <!--test filtering out genetic sequences and bad protein sequences-->
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
65 <test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
66 <param name="inFasta" value="geneticFiltering.in"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
67 <param name="checkIsProtein" value="true"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
68 <output name="goodFastaOut" file="geneticFilteringGood.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
69 <output name="badFastaOut" file="geneticFilteringBad.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
70 </test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
71
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
72 <test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
73 <param name="inFasta" value="length5Filtering.in"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
74 <param name="minimumLength" value="5"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
75 <output name="goodFastaOut" file="length5FilteringGood.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
76 <output name="badFastaOut" file="length5FilteringBad.out"/>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
77 </test>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
78 </tests>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
79 <help>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
80
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
81 <![CDATA[
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
82 **Notes**
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
83
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
84 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
85 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
86
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
87 **Output**
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
88
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
89 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
90 The failed sequences may be examined for typos and other errors.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
91
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
92 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
93
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
94 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats:
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
95 * UniProt,
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
96 * SwissProt (starts with ">sw|" or ">SW|")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
97 * NCBI (starts with ">gi|" or ">GI|")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
98 * Halobacterium from Max Planck (starts with "OE")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
99 * H Influenza, from Novartis (starts with ">hflu_")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
100 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
101 * M Tuberculosis (starts with ">M. tub")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
102 * Saccharomyces Genome Database (contains "SGDID")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
103 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
104 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
105 * UPS (contains "\_HUMAN\_UPS")
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
106
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
107 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number.
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
108 ]]>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
109 </help>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
110 <citations>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
111 <citation type="bibtex">
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
112 @misc{fastaValidationTool,
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
113 author = {The GalaxyP Team},
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
114 date = {22 June 2017},
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
115 title = {FASTA Database Validation Tool}
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
116 }
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
117 </citation>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
118 </citations>
bad73d1d7345 planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff changeset
119 </tool>