Repository 'dbbuilder'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/dbbuilder

Changeset 8:a85fbebe8b2f (2017-05-03)
Previous changeset 7:a94dc3d469cb (2017-05-02) Next changeset 9:c1b437242fee (2020-09-13)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dbbuilder commit baa8095b41984515948c6ef2fda7c7e61c945f94
modified:
dbbuilder.xml
b
diff -r a94dc3d469cb -r a85fbebe8b2f dbbuilder.xml
--- a/dbbuilder.xml Tue May 02 08:52:01 2017 -0400
+++ b/dbbuilder.xml Wed May 03 09:53:17 2017 -0400
[
b'@@ -1,4 +1,4 @@\n-<tool id="dbbuilder" name="Protein Database Downloader" version="0.2.1">\n+<tool id="dbbuilder" name="Protein Database Downloader" version="0.3.0">\n     <description></description>\n     <requirements>\n         <requirement type="package">gnu-wget</requirement>\n@@ -20,20 +20,46 @@\n             ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta"\n             #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta"\n             #set $type = "direct"\n+        #elif $source.from == "HMP"\n+            #set $url = \'http://downloads.hmpdacc.org/data/reference_genomes/body_sites/\' + str($source.site) + \'.pep.fsa\'\n+            #set $type = "direct"\n+        #elif $source.from == "HOMD"\n+            #set $url = \'ftp://ftp.homd.org/human_oral_microbial_genomic_sequences/current/\' + str($source.annotation)\n+            #if str($source.annotation).endswith(\'.tar.gz\'):\n+                #set $type = "tgz" \n+            #elif str($source.annotation).endswith(\'.zip\'):\n+                #set $type = "zip" \n+            #end if \n+        #elif $source.from == \'EBI Metagenomics\'\n+            #set $url = \'https://www.ebi.ac.uk/metagenomics/projects/\' + str($source.ebi_project) + \'/samples/\' + str($source.ebi_sample) + \'/runs/\' + str($source.ebi_run) + \'/results/versions/\' + str($source.ebi_version) + \'/sequences/\' + str($source.ebi_annotation) + \'/chunks/1\'\n+            #set $type = "gzip"\n         #elif $source.from == "url"\n             #set $url = $source.url\n-            #set $type = "direct"\n+            #set $type = $source.archive_type\n         #end if\n         #if $type =="direct"\n-            wget -nv \'$url\' -O \'${output_database}\'\n+            wget -nv \'$url\' -O \'${output_database}\' --no-check-certificate\n+        #elif $type =="zip"\n+            wget -nv \'$url\' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > \'${output_database}\'\n+        #elif $type =="gzip"\n+            wget -nv \'$url\' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`;  then gzcat tmp.gz; else zcat tmp.gz ; fi) > \'${output_database}\'\n+        #elif $type =="bzip2"\n+            wget -nv \'$url\' -O tmp.bz2 --no-check-certificate && bzcat tmp.bz2 > \'${output_database}\'\n+        #elif $type =="tgz"\n+            wget -nv \'$url\' -O tmp.tar.gz && tar zxfO tmp.tar.gz > \'${output_database}\'\n+        #elif $type =="tbz"\n+            wget -nv \'$url\' -O tmp.tar.bz && tar jxfO tmp.tar.bz > \'${output_database}\'\n         #end if\n ]]>\n     </command>\n     <inputs>\n         <conditional name="source">\n-            <param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a corss species collection of functional protein databases">\n+            <param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases">\n                 <option value="uniprot">UniProtKB</option>\n                 <option value="cRAP">cRAP (contaminants)</option>\n+                <option value="HMP">Human Microbiome Project body sites</option>\n+                <option value="EBI Metagenomics">EBI Metagenomics</option>\n+                <option value="HOMD">Human Oral Microbiome Database (HOMD)</option>\n                 <option value="url">Custom URL</option>\n             </param>\n             <when value="uniprot">\n@@ -67,19 +93,60 @@\n                 <param name="include_isoform" type="boolean" truevalue="&amp;include=yes" falsevalue="" label="Include isoform data" help="several different forms of a given protein are incorporated into database" />\n             </when>\n             <when value="cRAP" />\n+            <when value="HMP">\n+                <param name="site" type="select" label="Proteome for body site">\n+                    <option value="Airways">HMP airways</option>\n+                '..b'"url">\n-                <param name="url" value="" type="text" label="URL (http, ftp)">\n+                <param name="url" value="" type="text" label="URL (http, ftp) of Fasta sequences">\n                     <sanitizer>\n                         <valid>\n                             <add value="%"/>\n+                            <add value="~"/>\n                         </valid>\n                     </sanitizer>\n                 </param>\n+                <param name="archive_type" type="select" label="Fasta source compression type">\n+                    <option value="direct" selected="true">fasta file (uncompressed)</option>\n+                    <option value="gzip">fasta.gz (gzip compressed)</option>\n+                    <option value="bzip2">fasta.bz2 (bzip2 compressed)</option>\n+                    <option value="zip">fasta.zip or fasta.Z (Zip compressed)</option>\n+                    <option value="tgz">fasta.tgz or fasta.tar.gz (tar archive gzip compressed)</option>\n+                    <option value="tbz">fasta.tbz or fasta.tar.bz (tar archive bzip2 compressed)</option>\n+                </param>\n             </when>\n         </conditional>\n     </inputs>\n     <outputs>\n-        <data format="fasta" name="output_database" label="Protein Database" />\n+        <data format="fasta" name="output_database" label="Protein Database ${source.from}" />\n     </outputs>\n     <tests>\n         <test>\n@@ -90,6 +157,28 @@\n                 </assert_contents>\n             </output>\n         </test>\n+        <test>\n+            <param name="from" value="url" />\n+            <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" />\n+            <param name="archive_type" value="direct" />\n+            <output name="output_database">\n+                <assert_contents>\n+                    <has_text text="KKA1_ECOLX" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test>\n+            <param name="from" value="EBI Metagenomics" />\n+            <param name="ebi_project" value="DRP003095" />\n+            <param name="ebi_sample" value="DRS029200" />\n+            <param name="ebi_run" value="DRR033743" />\n+            <param name="ebi_version" value="3.0" />\n+            <output name="output_database">\n+                <assert_contents>\n+                    <has_text text="DRR033743" />\n+                </assert_contents>\n+            </output>\n+        </test>\n     </tests>\n     <help>\n <![CDATA[\n@@ -99,11 +188,36 @@\n \n **External Links**\n \n-_Galaxy-P 101 shows usage Protein Database Downloader tool in the creation of a workflow\n-.. _Galaxy-P 101: http://msi-galaxy-p.readthedocs.org/en/latest/sections/galaxyp_101.html\n-_UniProtKB provides additional information about the UniProt Knowledgebase\n+  - Galaxy-P_101_ shows usage Protein Database Downloader tool in the creation of a workflow\n+  - UniProtKB_ provides additional information about the UniProt Knowledgebase\n+\n+\n+.. _Galaxy-P_101: http://msi-galaxy-p.readthedocs.org/en/latest/sections/galaxyp_101.html\n .. _UniProtKB: http://www.uniprot.org/help/uniprotkb\n+\n+\n+**Additional Protein Fasta URLs**\n+\n+  *HUMAN GUT METAPROTEOME:*\n+\n+    * 512MB gzip ftp://public.genomics.org.cn/BGI/gutmeta/UniSet/UniGene.pep.gz\n+    *  61MB gzip http://www.bork.embl.de/~arumugam/Qin_et_al_2010/frequent_microbe_proteins.fasta.gz\n+\n+\n+  *MOUSE GUT MICROBIOTA:*\n+\n+    * 417MB gzip ftp://climb.genomics.cn/pub/10.5524/100001_101000/100114/Genecatalog/184sample_2.6M.GeneSet.pep.gz\n+    * See: http://gigadb.org/dataset/view/id/100114/token/mZlMYJIF04LshpgP\n+\n+\n ]]>\n     </help>\n+    <citations>\n+        <citation type="doi">10.1093/nar/gkw1099</citation>\n+        <citation type="doi">10.1093/nar/gkv1195 </citation>\n+        <citation type="doi">10.1093/database/baq013</citation>\n+        <citation type="doi">10.1038/nature11209</citation>\n+        <citation type="doi">10.1038/nature11234</citation>\n+    </citations>\n </tool>\n \n'