Mercurial > repos > caleb-easterly > validate_fasta_database
changeset 2:d61a95fe20e4 draft
planemo upload commit 833e8a1d5ef37cbd4cadad6c90a51b268871627b-dirty
author | caleb-easterly |
---|---|
date | Fri, 23 Jun 2017 10:58:49 -0400 |
parents | 0f08a4a0dd15 |
children | d45b2b8177a1 |
files | validateFASTA.xml validate_fasta_headers.xml |
diffstat | 2 files changed, 65 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/validateFASTA.xml Fri Jun 23 10:49:25 2017 -0400 +++ b/validateFASTA.xml Fri Jun 23 10:58:49 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="validateFASTA" name="Check FASTA Headers" version="0.1.0"> +<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.1"> <requirements> </requirements> <stdio>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/validate_fasta_headers.xml Fri Jun 23 10:58:49 2017 -0400 @@ -0,0 +1,64 @@ +<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.0"> + <requirements> + </requirements> + <stdio> + <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT-jar-with-dependencies.jar "$FASTA" "$goodFasta" "$badFasta" "$crashIfInvalid" + ]]></command> + <inputs> + <param type="data" name="FASTA" format="fasta" label="Select input FASTA dataset"/> + <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/> + </inputs> + <outputs> + <data name="goodFasta" format="fasta" label="Validate FASTA: Passed Sequences"/> + <data name="badFasta" format="fasta" label="Validate FASTA: Failed Sequences"/> + </outputs> + <tests> + <test> + <param name="FASTA" value="fastaFilteringTest_IN.fasta"/> + <output name="goodFasta" file="fastaFilteringTest_OUT1.fasta" /> + <output name="badFasta" file="fastaFilteringTest_OUT2.fasta" /> + </test> + </tests> + <help> +<![CDATA[ +**Notes** + +Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. +Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash. + +**Output** + +The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error. +The failed sequences may be examined for typos and other errors. + +In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. + +Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: + * UniProt, + * SwissProt (starts with ">sw|" or ">SW|") + * NCBI (starts with ">gi|" or ">GI|") + * Halobacterium from Max Planck (starts with "OE") + * H Influenza, from Novartis (starts with ">hflu_") + * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_") + * M Tuberculosis (starts with ">M. tub") + * Saccharomyces Genome Database (contains "SGDID") + * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]") + * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA") + * UPS (contains "\_HUMAN\_UPS") + +Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. +]]> + </help> + <citations> + <citation type="bibtex"> + @misc{fastaValidation, + author = {The GalaxyP Team}, + date = {22 June 2017}, + title = {FASTA Database Validation Tool} + } + </citation> + </citations> +</tool>