view data_manager/customProDB_annotation.xml @ 3:9ee512decde8 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/custom_pro_db commit d4b5497065b853ed094aebc9e4185e9995c5e0e0
author galaxyp
date Wed, 01 Nov 2017 19:34:06 -0400
parents 9b4ee836e35b
children
line wrap: on
line source

<tool id="custom_pro_db_annotation_data_manager" name="CustomProDB Annotation" tool_type="manage_data" version="1.16.1.0">
  <description>builder</description>
  <requirements>
    <requirement type="package" version="3.3.1">r-base</requirement>
    <!--<requirement type="package" version="1.14.0">bioconductor-customprodb</requirement>-->
    <requirement type="package" version="1.18.0">bioconductor-rgalaxy</requirement>
    <requirement type="package" version="1.21.0">bioconductor-biocinstaller</requirement>
    <requirement type="package" version="1.20.3">bioconductor-variantannotation</requirement>
    <requirement type="package" version="1.26.4">bioconductor-genomicfeatures</requirement>
    <requirement type="package" version="1.11.1">r-devtools</requirement>
    <requirement type="package" version="3.98_1.4">r-xml</requirement>
    <requirement type="package" version="0.10.11">r-rmysql</requirement>
    <requirement type="package" version="1.0.2">r-testthat</requirement>
    <requirement type="package" version="0.1.0">r-getoptlong</requirement>
    <requirement type="package" version="1.1.2">r-stringi</requirement>
    <requirement type="package" version="1.1.0">r-stringr</requirement>
    <requirement type="package" version="1.10.0">r-data.table</requirement>
    <requirement type="package" version="0.4_10">r-sqldf</requirement>
    <requirement type="package" version="0.6_6">r-gsubfn</requirement>
    <requirement type="package" version="2.3_47">r-chron</requirement>
    <requirement type="package" version="0.3_10">r-proto</requirement>
    <requirement type="package" version="1.8.4">r-plyr</requirement>
  </requirements>
  <stdio>
    <exit_code range=":-1" />
    <exit_code range="1:" />
  </stdio>
  <command><![CDATA[
    Rscript --vanilla '$__tool_directory__/customProDB_annotation.R'
        --outputFile '${out_file}'

        #if str($transcriptome_annotation.source) == 'refseq':
          --dbkey '${transcriptome_annotation.dbkey}'
          --dbkey_description '${ transcriptome_annotation.dbkey.get_display_text().strip("\"'") }'
        #else:
          --ensembl_dataset '${transcriptome_annotation.ensembl_dataset.fields.dataset}'
          --ensembl_host '${transcriptome_annotation.ensembl_dataset.fields.host}'
          --dbkey_description '${transcriptome_annotation.ensembl_dataset.fields.name}'
        #end if

        --dbsnp '${dbsnp}'
        $cosmic
    2>&1
]]>
  </command>
  <inputs>
    <conditional name="transcriptome_annotation">
      <param name="source" type="select" label="What source do you want to use for mapping between genes, transcripts, and proteins?" help="RefSeq transcripts are like NM_xxxx, Ensembl transcripts are like ENSTxxxx">
        <option value="refseq">Annotate transcriptome with RefSeq (from NCBI/UCSC)</option>
        <option value="ensembl">Annotate transcriptome with Ensembl (from Biomart)</option>
      </param>
      <when value="refseq">
        <param type="genomebuild" name="dbkey" value="" label="UCSC dbKey for reference genome" help="e.g. hg38, hg19, mm10, canFam31" />
      </when>
      <when value="ensembl">
        <param type="select" name="ensembl_dataset" value="" label="Ensembl reference genome identifier">
          <options from_file="ensembl_datasets.loc">
            <column name="value" index="2" />
            <column name="dataset" index="0" />
            <column name="host" index="1" />
            <validator type="no_options" message="Ensembl dataset list not loaded"/>
          </options>
        </param>
      </when>
    </conditional>
    <param type="text" name="dbsnp" value="" label="dbSNP identifier (select organisms only, e.g. human, mouse, cow)" help="e.g. snp142" />
    <param type="boolean" name="cosmic" truevalue="--cosmic true" falsevalue="" label="Annotate somatic SNPs from COSMIC (human only)" />
  </inputs>
  <outputs>
    <data name="out_file" format="data_manager_json"/>
  </outputs>
  <help>

.. class:: infomark

This data manager creates the transcriptome annotation in the RData format needed by customProDB.
Two annotation sources are supported: UCSC and Ensembl.
Note that because UCSC's table browser only provides current gene annotations for a given genome assembly,
only the Ensembl annotation is entirely reproducible, i.e. running again with the same settings next month will create the same annotation.

Ensembl chromosome names (1,2, ...) are converted to UCSC format (chr1,chr2, ...) to ease integration with other Galaxy tools.
  </help>
  <citations>
  <citation type="doi">10.1093/bioinformatics/btt543</citation>
  <citation type="bibtex">@misc{toolsGalaxyP, author = {Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
                                year = {2017}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
  </citations>
</tool>