annotate validateFASTA.xml @ 2:d61a95fe20e4 draft

planemo upload commit 833e8a1d5ef37cbd4cadad6c90a51b268871627b-dirty
author caleb-easterly
date Fri, 23 Jun 2017 10:58:49 -0400
parents daf36a052a01
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d61a95fe20e4 planemo upload commit 833e8a1d5ef37cbd4cadad6c90a51b268871627b-dirty
caleb-easterly
parents: 0
diff changeset
1 <tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.1">
0
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
2 <requirements>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
3 </requirements>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
4 <stdio>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
5 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
6 </stdio>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
7 <command detect_errors="exit_code"><![CDATA[
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
8 java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT-jar-with-dependencies.jar "$FASTA" "$goodFasta" "$badFasta" "$crashIfInvalid"
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
9 ]]></command>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
10 <inputs>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
11 <param type="data" name="FASTA" format="fasta" label="Select input FASTA dataset"/>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
12 <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
13 </inputs>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
14 <outputs>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
15 <data name="goodFasta" format="fasta" label="Validate FASTA: Passed Sequences"/>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
16 <data name="badFasta" format="fasta" label="Validate FASTA: Failed Sequences"/>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
17 </outputs>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
18 <tests>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
19 <test>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
20 <param name="FASTA" value="fastaFilteringTest_IN.fasta"/>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
21 <output name="goodFasta" file="fastaFilteringTest_OUT1.fasta" />
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
22 <output name="badFasta" file="fastaFilteringTest_OUT2.fasta" />
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
23 </test>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
24 </tests>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
25 <help>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
26 <![CDATA[
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
27 **Notes**
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
28
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
29 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema.
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
30 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
31
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
32 **Output**
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
33
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
34 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
35 The failed sequences may be examined for typos and other errors.
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
36
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
37 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database.
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
38
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
39 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats:
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
40 * UniProt,
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
41 * SwissProt (starts with ">sw|" or ">SW|")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
42 * NCBI (starts with ">gi|" or ">GI|")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
43 * Halobacterium from Max Planck (starts with "OE")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
44 * H Influenza, from Novartis (starts with ">hflu_")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
45 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
46 * M Tuberculosis (starts with ">M. tub")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
47 * Saccharomyces Genome Database (contains "SGDID")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
48 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
49 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
50 * UPS (contains "\_HUMAN\_UPS")
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
51
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
52 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number.
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
53 ]]>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
54 </help>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
55 <citations>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
56 <citation type="bibtex">
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
57 @misc{fastaValidation,
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
58 author = {The GalaxyP Team},
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
59 date = {22 June 2017},
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
60 title = {FASTA Database Validation Tool}
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
61 }
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
62 </citation>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
63 </citations>
daf36a052a01 planemo upload commit 486a143038c57c7a2368c66a55877cda12f694ed-dirty
caleb-easterly
parents:
diff changeset
64 </tool>