Mercurial > repos > caleb-easterly > validate_fasta_database
changeset 4:e5a59fabeeba draft
planemo upload commit e412b2a32af63a1a6dfcfcda5ad66ada0e1cd954-dirty
author | caleb-easterly |
---|---|
date | Wed, 28 Jun 2017 13:34:56 -0400 |
parents | d45b2b8177a1 |
children | d4bd627618e5 |
files | FastaHeader-1.0-SNAPSHOT-jar-with-dependencies.jar FastaHeader-1.0-SNAPSHOT.jar validateFASTA.xml validate_fasta_headers.xml |
diffstat | 4 files changed, 20 insertions(+), 67 deletions(-) [+] |
line wrap: on
line diff
--- a/validateFASTA.xml Wed Jun 28 11:55:46 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.1"> - <requirements> - </requirements> - <stdio> - <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/> - </stdio> - <command detect_errors="exit_code"><![CDATA[ - java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT-jar-with-dependencies.jar "$FASTA" "$goodFasta" "$badFasta" "$crashIfInvalid" - ]]></command> - <inputs> - <param type="data" name="FASTA" format="fasta" label="Select input FASTA dataset"/> - <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/> - </inputs> - <outputs> - <data name="goodFasta" format="fasta" label="Validate FASTA: Passed Sequences"/> - <data name="badFasta" format="fasta" label="Validate FASTA: Failed Sequences"/> - </outputs> - <tests> - <test> - <param name="FASTA" value="fastaFilteringTest_IN.fasta"/> - <output name="goodFasta" file="fastaFilteringTest_OUT1.fasta" /> - <output name="badFasta" file="fastaFilteringTest_OUT2.fasta" /> - </test> - </tests> - <help> -<![CDATA[ -**Notes** - -Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. -Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash. - -**Output** - -The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error. -The failed sequences may be examined for typos and other errors. - -In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. - -Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: - * UniProt, - * SwissProt (starts with ">sw|" or ">SW|") - * NCBI (starts with ">gi|" or ">GI|") - * Halobacterium from Max Planck (starts with "OE") - * H Influenza, from Novartis (starts with ">hflu_") - * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_") - * M Tuberculosis (starts with ">M. tub") - * Saccharomyces Genome Database (contains "SGDID") - * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]") - * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA") - * UPS (contains "\_HUMAN\_UPS") - -Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. -]]> - </help> - <citations> - <citation type="bibtex"> - @misc{fastaValidation, - author = {The GalaxyP Team}, - date = {22 June 2017}, - title = {FASTA Database Validation Tool} - } - </citation> - </citations> -</tool>
--- a/validate_fasta_headers.xml Wed Jun 28 11:55:46 2017 -0400 +++ b/validate_fasta_headers.xml Wed Jun 28 13:34:56 2017 -0400 @@ -1,15 +1,32 @@ -<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.0"> +<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.1"> <requirements> </requirements> <stdio> <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/> </stdio> <command detect_errors="exit_code"><![CDATA[ - java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT-jar-with-dependencies.jar "$FASTA" "$goodFasta" "$badFasta" "$crashIfInvalid" + java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT.jar '$FASTA' '$goodFasta' '$badFasta' '$crashIfInvalid' + '$checkIsProtein' $checkLength.checkLength + #if $checkLength.checkLength + $checkLength.minimumLength + #end if ]]></command> <inputs> <param type="data" name="FASTA" format="fasta" label="Select input FASTA dataset"/> - <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/> + <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/> + <param type="boolean" name="checkIsProtein" label="Ensure that sequence is not DNA or RNA?"/> + <conditional name="checkLength"> + <param type="boolean" name="checkLength" label="Filter out sequences below a minimum length?"> + <option value="true"></option> + <option value="false"></option> + </param> + <when value="true"> + <param name="minimumLength" type="integer" value="0" label="Minimum length that AA sequence must have"/> + </when> + <when value="false"> + </when> + </conditional> + </inputs> <outputs> <data name="goodFasta" format="fasta" label="Validate FASTA: Passed Sequences"/>