changeset 0:45755942ae7b draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/custom_pro_db commit 4bb5b663989d5f04e8fb74b111456f16d6edaa66
author galaxyp
date Tue, 14 Mar 2017 14:11:55 -0400
parents
children 9b4ee836e35b
files COPYING data_manager/customProDB_annotation.R data_manager/customProDB_annotation.xml data_manager_conf.xml tool-data/customProDB.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 331 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COPYING	Tue Mar 14 14:11:55 2017 -0400
@@ -0,0 +1,121 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+    HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+  i. the right to reproduce, adapt, distribute, perform, display,
+     communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+     likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+     subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data
+     in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+     European Parliament and of the Council of 11 March 1996 on the legal
+     protection of databases, and under any national implementation
+     thereof, including any amended or successor version of such
+     directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+     world based on applicable law or treaty, and any national
+     implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+    surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+    warranties of any kind concerning the Work, express, implied,
+    statutory or otherwise, including without limitation warranties of
+    title, merchantability, fitness for a particular purpose, non
+    infringement, or the absence of latent or other defects, accuracy, or
+    the present or absence of errors, whether or not discoverable, all to
+    the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+    that may apply to the Work or any use thereof, including without
+    limitation any person's Copyright and Related Rights in the Work.
+    Further, Affirmer disclaims responsibility for obtaining any necessary
+    consents, permissions or other rights required for any use of the
+    Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+    party to this document and has no duty or obligation with respect to
+    this CC0 or use of the Work.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/customProDB_annotation.R	Tue Mar 14 14:11:55 2017 -0400
@@ -0,0 +1,125 @@
+#!/usr/bin/env Rscript
+
+initial.options <- commandArgs(trailingOnly = FALSE)
+script_parent_dir <- dirname(sub("--file=", "", initial.options[grep("--file=", initial.options)]))
+
+## begin warning handler
+withCallingHandlers({
+
+library(methods) # Because Rscript does not always do this
+
+options('useFancyQuotes' = FALSE)
+
+suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages(library("RGalaxy"))
+
+
+option_list <- list()
+option_list$dbkey <- make_option('--dbkey', type='character')
+option_list$dbsnp <- make_option('--dbsnp', type='character')
+option_list$cosmic <- make_option('--cosmic', type='logical')
+option_list$outputFile <- make_option('--outputFile', type='character')
+option_list$dbkey_description <- make_option('--dbkey_description', type='character')
+
+opt <- parse_args(OptionParser(option_list=option_list))
+
+
+customProDB_annotation <- function(
+	dbkey = GalaxyCharacterParam(required=TRUE), 
+	dbsnp_str = GalaxyCharacterParam(required=FALSE), 
+	cosmic = GalaxyLogicalParam(required=FALSE), 
+	dbkey_description = GalaxyCharacterParam(required=FALSE), 
+	outputFile = GalaxyOutput("output","json"))
+{
+    if (!file.exists(outputFile))
+    {
+        gstop("json params file does not exist")
+    }
+
+    if (length(dbkey_description) < 1)
+    {
+        dbkey_description = dbkey
+    }
+
+    if (length(dbsnp_str) > 0)
+    {
+        dbsnp = dbsnp_str
+    }
+    else
+    {
+        dbsnp = NULL
+    }
+
+    use_cosmic = FALSE
+    if (length(cosmic) > 0)
+    {
+        if (grepl("^hg", dbkey))
+        {
+            use_cosmic = TRUE
+        }
+        else
+        {
+            gstop("COSMIC annotation requested but dbkey does not indicate a human genome (e.g. hg19)")
+        }
+    }
+
+    suppressPackageStartupMessages(library(rjson))
+    params = fromJSON(file=outputFile)
+    target_directory = params$output_data[[1]]$extra_files_path
+    dir.create(target_directory)
+
+    tryCatch(
+    {
+        file.remove(outputFile)
+    }, error=function(err)
+    {
+        gstop("failed to remove json params file after reading")
+    })
+
+    ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="")
+    ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="")
+    codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="")
+    proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="")
+
+    suppressPackageStartupMessages(library(customProDB))
+    options(timeout=3600)
+
+    cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n"))
+    download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb')
+
+    cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n"))
+    download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb')
+
+    cat(paste("Preparing Refseq annotation files\n"))
+    customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory, dbsnp=dbsnp, COSMIC=use_cosmic)
+    
+    outputPath = paste(dbkey, "/customProDB", sep="")
+    output = list(data_tables = list())
+    output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey_description, dbkey=dbkey, value=dbkey)
+    write(toJSON(output), file=outputFile)
+}
+
+
+params <- list()
+for(param in names(opt))
+{
+    if (!param == "help")
+        params[param] <- opt[param]
+}
+
+setClass("GalaxyRemoteError", contains="character")
+wrappedFunction <- function(f)
+{
+    tryCatch(do.call(f, params),
+        error=function(e) new("GalaxyRemoteError", conditionMessage(e)))
+}
+
+
+suppressPackageStartupMessages(library(RGalaxy))
+do.call(customProDB_annotation, params)
+
+## end warning handler
+}, warning = function(w) {
+    cat(paste("Warning:", conditionMessage(w), "\n"))
+    invokeRestart("muffleWarning")
+})
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/customProDB_annotation.xml	Tue Mar 14 14:11:55 2017 -0400
@@ -0,0 +1,40 @@
+<tool id="custom_pro_db_annotation_data_manager" name="CustomProDB Annotation" tool_type="manage_data" version="0.0.1">
+  <description>builder</description>
+  <requirements>
+    <requirement type="package" version="1.14.0">bioconductor-customprodb</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range=":-1" />
+    <exit_code range="1:" />
+  </stdio>
+  <command><![CDATA[
+    Rscript --vanilla '$__tool_directory__/customProDB_annotation.R'
+    --outputFile '${out_file}'
+    --dbkey '${dbkey}'
+    --dbsnp '${dbsnp}'
+    $cosmic
+    --dbkey_description '${ dbkey.get_display_text() }'
+    2>1
+]]>
+  </command>
+  <inputs>
+    <param type="genomebuild" name="dbkey" value="" label="UCSC dbKey for reference genome" />
+    <param type="text" name="dbsnp" value="" label="dbSNP identifier currently available from UCSC" help="e.g. 'snp142'" />
+    <param type="boolean" name="cosmic" truevalue="--cosmic true" falsevalue="" label="Annotate somatic SNPs from COSMIC (human only)" />
+  </inputs>
+  <outputs>
+    <data name="out_file" format="data_manager_json"/>
+  </outputs>
+  <help>
+
+.. class:: infomark
+
+**Notice:** If you leave name, description, or id blank, it will be generated automatically. 
+
+  </help>
+  <citations>
+  <citation type="doi">10.1093/bioinformatics/btt543</citation>
+  <citation type="bibtex">@misc{toolsGalaxyP, author = {Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
+                                year = {2017}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Tue Mar 14 14:11:55 2017 -0400
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<data_managers>
+
+    <data_manager tool_file="data_manager/customProDB_annotation.xml" id="custom_pro_db_annotation_builder" version="0.0.1">
+        <data_table name="customProDB">
+            <output>
+                <column name="value" />
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/customProDB</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/customProDB</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/customProDB.loc.sample	Tue Mar 14 14:11:55 2017 -0400
@@ -0,0 +1,16 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "customProDB" directory (a subdirectory of a genome build).
+#This file has the format (white space characters are TAB characters):
+#
+#<unique_build_id>	<dbkey>		<display_name>	<path to annotation files>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3		/path/to/genome/apiMel3
+#hg19canon	hg19		Human (Homo sapiens): hg19 Canonical		/path/to/genome/hg19
+#hg19full	hg19		Human (Homo sapiens): hg19 Full			/path/to/genome/hg19full
+#
+#Your customProDB.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Mar 14 14:11:55 2017 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all customProDB annotations under genome directory -->
+    <table name="customProDB" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/customProDB.loc" />
+    </table>
+</tables>