changeset 2:d61a95fe20e4 draft

planemo upload commit 833e8a1d5ef37cbd4cadad6c90a51b268871627b-dirty
author caleb-easterly
date Fri, 23 Jun 2017 10:58:49 -0400
parents 0f08a4a0dd15
children d45b2b8177a1
files validateFASTA.xml validate_fasta_headers.xml
diffstat 2 files changed, 65 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/validateFASTA.xml	Fri Jun 23 10:49:25 2017 -0400
+++ b/validateFASTA.xml	Fri Jun 23 10:58:49 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="validateFASTA" name="Check FASTA Headers" version="0.1.0">
+<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.1">
     <requirements>
     </requirements>
     <stdio>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/validate_fasta_headers.xml	Fri Jun 23 10:58:49 2017 -0400
@@ -0,0 +1,64 @@
+<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.0">
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
+    </stdio>
+    <command detect_errors="exit_code"><![CDATA[
+        java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT-jar-with-dependencies.jar "$FASTA" "$goodFasta" "$badFasta" "$crashIfInvalid"  
+    ]]></command>
+    <inputs>
+        <param type="data" name="FASTA" format="fasta" label="Select input FASTA dataset"/>
+        <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/>        
+    </inputs>
+    <outputs>
+        <data name="goodFasta" format="fasta" label="Validate FASTA: Passed Sequences"/>
+        <data name="badFasta" format="fasta" label="Validate FASTA: Failed Sequences"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="FASTA" value="fastaFilteringTest_IN.fasta"/>
+            <output name="goodFasta" file="fastaFilteringTest_OUT1.fasta" />
+            <output name="badFasta" file="fastaFilteringTest_OUT2.fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**Notes**
+
+Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. 
+Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
+        
+**Output**
+
+The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
+The failed sequences may be examined for typos and other errors. 
+
+In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. 
+
+Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: 
+    * UniProt,
+    * SwissProt (starts with ">sw|" or ">SW|")
+    * NCBI (starts with ">gi|" or ">GI|")
+    * Halobacterium from Max Planck (starts with "OE")
+    * H Influenza, from Novartis (starts with ">hflu_")
+    * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
+    * M Tuberculosis (starts with ">M. tub")
+    * Saccharomyces Genome Database (contains "SGDID")
+    * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
+    * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
+    * UPS (contains "\_HUMAN\_UPS")
+           
+Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. 
+]]>
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{fastaValidation,
+            author = {The GalaxyP Team},
+            date = {22 June 2017},
+            title = {FASTA Database Validation Tool}
+            }
+        </citation>
+    </citations>
+</tool>