diff dbbuilder.xml @ 5:1d3557cec567 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dbbuilder commit da321d452c314c8c66ea848fe1fe20879c03b0e4
author galaxyp
date Sat, 29 Apr 2017 09:46:31 -0400
parents
children 19ee2e85931a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dbbuilder.xml	Sat Apr 29 09:46:31 2017 -0400
@@ -0,0 +1,109 @@
+<tool id="dbbuilder" name="Protein Database Downloader" version="0.2.1">
+    <description></description>
+    <requirements>
+        <requirement type="package">gnu-wget</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:"  level="fatal" description="Error downloading database." />
+        <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." />
+    </stdio>
+      <!-- TODO: escape quotes. -->
+        <!-- Add NCBI and maxquant contaminants. -->
+        <!-- http://maxquant.org/contaminants.zip -->
+        <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz-->
+    <command>
+<![CDATA[
+        #if $source.from == "uniprot"
+            #set $url = "http://www.uniprot.org/uniprot/?query=taxonomy%3a%22" + str($source.taxon) + "%22" + str($source.set) + str($source.reviewed) + "&amp;force=yes&amp;format=fasta" + str($source.include_isoform)
+            #set $type = "direct"
+        #elif $source.from == "cRAP"
+            ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta"
+            #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta"
+            #set $type = "direct"
+        #elif $source.from == "url"
+            #set $url = $source.url
+            #set $type = "direct"
+        #end if
+        #if $type =="direct"
+            wget -nv '$url' -O '${output_database}'
+        #end if
+]]>
+    </command>
+    <inputs>
+        <conditional name="source">
+            <param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a corss species collection of functional protein databases">
+                <option value="uniprot">UniProtKB</option>
+                <option value="cRAP">cRAP (contaminants)</option>
+                <option value="url">Custom URL</option>
+            </param>
+            <when value="uniprot">
+                <param name="taxon" type="select" format="text" help="select species for protein database">
+                    <label>Taxonomy</label>
+                    <options from_file="uniprot_taxons.loc">
+                        <column name="name" index="0" />
+                        <column name="value" index="1" />
+                    </options>
+                </param>
+                <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy">
+                    <option value="+">UniProtKB</option>
+                    <option value="+reviewed%3Ayes">UniProtKB/Swiss-Prot (reviewed only)</option>
+                    <option value="+reviewed%3Ano">UniProtKB/TrEMBL (unreviewed only)</option>
+                    <sanitizer>
+                        <valid>
+                            <add value="%"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+                <param name="set" type="select" label="Proteome Set">
+                    <option value="+">Any</option>
+                    <option value="+keyword%3a1185" selected="true">Reference Proteome Set</option>
+                    <option value="+keyword%3a181">Complete Proteome Set</option>
+                    <sanitizer>
+                        <valid>
+                            <add value="%"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+                <param name="include_isoform" type="boolean" truevalue="&amp;include=yes" falsevalue="" label="Include isoform data" help="several different forms of a given protein are incorporated into database" />
+            </when>
+            <when value="cRAP" />
+            <when value="url">
+                <param name="url" value="" type="text" label="URL (http, ftp)">
+                    <sanitizer>
+                        <valid>
+                            <add value="%"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="fasta" name="output_database" label="Protein Database" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="from" value="cRAP" />
+            <output name="output_database">
+                <assert_contents>
+                    <has_text text="KKA1_ECOLX" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**Output**
+
+Creates a FASTA file of specified protein sequences for comparison with experimental MS/MS data in search algorithm. 
+
+**External Links**
+
+_Galaxy-P 101 shows usage Protein Database Downloader tool in the creation of a workflow
+.. _Galaxy-P 101: http://msi-galaxy-p.readthedocs.org/en/latest/sections/galaxyp_101.html
+_UniProtKB provides additional information about the UniProt Knowledgebase
+.. _UniProtKB: http://www.uniprot.org/help/uniprotkb
+]]>
+    </help>
+</tool>
+