Mercurial > repos > caleb-easterly > validate_fasta_database
annotate validate_fasta_database.xml @ 8:ba8bbfd8933e draft
planemo upload commit cdc8b7437c69c33b17b1b100445db0d4d6e8ddde
author | caleb-easterly |
---|---|
date | Mon, 17 Jul 2017 16:51:38 -0400 |
parents | bad73d1d7345 |
children | d7f67cc9a9e7 |
rev | line source |
---|---|
6
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
1 <tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.3"> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
2 <requirements> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
3 </requirements> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
4 <stdio> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
5 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
6 </stdio> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
7 <command detect_errors="exit_code"><![CDATA[ |
8
ba8bbfd8933e
planemo upload commit cdc8b7437c69c33b17b1b100445db0d4d6e8ddde
caleb-easterly
parents:
6
diff
changeset
|
8 java -jar $__tool_directory__/validate_fasta_database-0.2.jar |
6
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
9 '$inFasta' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
10 '$goodFastaOut' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
11 '$badFastaOut' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
12 '$crashIfInvalid' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
13 '$checkIsProtein' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
14 '$customLetters' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
15 '$checkHasAccession' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
16 '$minimumLength' |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
17 ]]></command> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
18 <inputs> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
19 <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
20 <param type="boolean" name="crashIfInvalid" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
21 label="Fail job if invalid FASTA headers detected?" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
22 value="false"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
23 <param type="boolean" name="checkIsProtein" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
24 label="Ensure that sequence is a valid amino acid sequence?" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
25 help="Checks that sequence only contains the 20 essential amino |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
26 acids (and optional non-standard AAs), and checks that is not DNA or RNA" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
27 value="true"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
28 <param type="text" name="customLetters" value="" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
29 label="Optional: add one letter codes for any non-standard amino acids that you are using. " |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
30 help="Anything that is not an upper case letter [A-Z] will be ignored."/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
31 <param type="boolean" name="checkHasAccession" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
32 label="Only pass sequences with accession numbers?" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
33 value="false"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
34 <param type="integer" name="minimumLength" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
35 label="Minimum length for sequences to pass" |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
36 value="0"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
37 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
38 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
39 <!--<conditional name="checkLength">--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
40 <!--<param type="boolean" name="checkLength" label="Filter out sequences below a minimum sequenceLength?">--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
41 <!--<option value="true"></option>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
42 <!--<option value="false"></option>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
43 <!--</param>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
44 <!--<when value="true">--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
45 <!--<param name="minimumLength" type="integer" value="0" label="Minimum sequenceLength that AA sequence must have"/>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
46 <!--</when>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
47 <!--<when value="false">--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
48 <!--</when>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
49 <!--</conditional>--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
50 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
51 </inputs> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
52 <outputs> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
53 <data name="goodFastaOut" format="fasta" label="Validate FASTA: Passed Sequences"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
54 <data name="badFastaOut" format="fasta" label="Validate FASTA: Failed Sequences"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
55 </outputs> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
56 <tests> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
57 <!-- test general filtering --> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
58 <test> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
59 <param name="inFasta" value="fastaFilteringTest_IN.fasta"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
60 <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" /> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
61 <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" /> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
62 </test> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
63 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
64 <!--test filtering out genetic sequences and bad protein sequences--> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
65 <test> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
66 <param name="inFasta" value="geneticFiltering.in"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
67 <param name="checkIsProtein" value="true"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
68 <output name="goodFastaOut" file="geneticFilteringGood.out"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
69 <output name="badFastaOut" file="geneticFilteringBad.out"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
70 </test> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
71 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
72 <test> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
73 <param name="inFasta" value="length5Filtering.in"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
74 <param name="minimumLength" value="5"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
75 <output name="goodFastaOut" file="length5FilteringGood.out"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
76 <output name="badFastaOut" file="length5FilteringBad.out"/> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
77 </test> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
78 </tests> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
79 <help> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
80 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
81 <![CDATA[ |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
82 **Notes** |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
83 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
84 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
85 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash. |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
86 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
87 **Output** |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
88 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
89 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error. |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
90 The failed sequences may be examined for typos and other errors. |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
91 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
92 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
93 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
94 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
95 * UniProt, |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
96 * SwissProt (starts with ">sw|" or ">SW|") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
97 * NCBI (starts with ">gi|" or ">GI|") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
98 * Halobacterium from Max Planck (starts with "OE") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
99 * H Influenza, from Novartis (starts with ">hflu_") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
100 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
101 * M Tuberculosis (starts with ">M. tub") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
102 * Saccharomyces Genome Database (contains "SGDID") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
103 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
104 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
105 * UPS (contains "\_HUMAN\_UPS") |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
106 |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
107 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
108 ]]> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
109 </help> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
110 <citations> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
111 <citation type="bibtex"> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
112 @misc{fastaValidationTool, |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
113 author = {The GalaxyP Team}, |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
114 date = {22 June 2017}, |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
115 title = {FASTA Database Validation Tool} |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
116 } |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
117 </citation> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
118 </citations> |
bad73d1d7345
planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
caleb-easterly
parents:
diff
changeset
|
119 </tool> |