view mutational_patterns.xml @ 10:59c0e2a7965c draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 533394bd219d56cde7fb9721803e8aed8fd5a25d"
author artbio
date Mon, 19 Oct 2020 23:34:58 +0000
parents 4ff3c984523b
children 7995a949189f
line wrap: on
line source

<tool id="mutational_patterns" name="Analyse Mutational Patterns/Signatures" version="2.0.0+galaxy11">
    <description>from genomic variations in vcf files</description>
    <requirements>
        <requirement type="package" version="2.0.0=r40_0">bioconductor-mutationalpatterns</requirement>
        <requirement type="package" version="1.6.6=r40h6115d3f_1">r-optparse</requirement>
        <requirement type="package" version="0.2.20=r40h0357c0b_1002">r-rjson</requirement>
        <requirement type="package" version="0.21.0=r40h0357c0b_1004">r-nmf</requirement>
        <requirement type="package" version="2.3=r40h6115d3f_1003">r-gridextra</requirement>
        <requirement type="package" version="1.4.3=r40_0">bioconductor-bsgenome.hsapiens.ucsc.hg38</requirement>
        <requirement type="package" version="0.99.1=r40_4">bioconductor-bsgenome.hsapiens.1000genomes.hs37d5</requirement>
        <requirement type="package" version="1.4.3=r40_0">bioconductor-bsgenome.hsapiens.ucsc.hg19</requirement>
        <requirement type="package" version="1.3.1000=r40_4">bioconductor-bsgenome.hsapiens.ncbi.grch38</requirement>
        <!-- install bioconda genomes
        bioconductor-bsgenome.mmusculus.ucsc.mm9
        bioconductor-bsgenome.mmusculus.ucsc.mm10   -->                    
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Tool exception" />
    </stdio>

    <command detect_errors="exit_code"><![CDATA[ 
#import json
#import os
Rscript $__tool_directory__/mutational_patterns.R 
    --inputs
    #set $filename_to_element_identifiers = {}
    #for $sample in $vcfs:
        $filename_to_element_identifiers.__setitem__(str($sample),  $sample.element_identifier)
    #end for
    '#echo json.dumps(filename_to_element_identifiers)#'
    --genome '$genome'
    
    #if $set_levels.choices == 'yes':
      --levels '$set_levels.levels'
    #end if

    #if $set_spectrum.choices == 'yes':
        --output_spectrum $spectrum
    #end if
    
    #if $set_denovo.choices == 'yes':
        --nrun $set_denovo.nrun
        --rank $set_denovo.rank
        --newsignum $set_denovo.newsignum
        --output_denovo $denovo
        --sigmatrix $sigmatrix
    #end if

    #if $set_cosmic.choices == 'yes':
        --cosmic_version $set_cosmic.cosmic_version
        --signum '$set_cosmic.signum'
        --output_cosmic $cosmic
    #end if
            
    #if $rdata_out
        --rdata '$rdata'
    #end if

]]></command>
    <inputs>
        <param name="vcfs" type="data_collection" format="vcf" label="VCF file(s) collection" multiple="true"/>
        <param name="genome" type="select" label="Reference Genome">
            <option value="BSgenome.Hsapiens.1000genomes.hs37d5">BSgenome.Hsapiens.1000genomes.hs37d5</option>
            <option value="BSgenome.Hsapiens.NCBI.GRCh38">BSgenome.Hsapiens.NCBI.GRCh38</option>
            <option value="BSgenome.Hsapiens.UCSC.hg19">BSgenome.Hsapiens.UCSC.hg19</option>
            <option value="BSgenome.Hsapiens.UCSC.hg38" selected="true">BSgenome.Hsapiens.UCSC.hg38</option>
            <!--<option value="BSgenome.Mmusculus.UCSC.mm10">BSgenome.Mmusculus.UCSC.mm10</option>
            <option value="BSgenome.Mmusculus.UCSC.mm9">BSgenome.Mmusculus.UCSC.mm9</option>-->
        </param>
        <conditional name="set_levels">
            <param name="choices" type="select" label="samples have levels/labels for grouping them in the analysis" display="radio"
                   help="for instance, female/male or genotype-1/genotype-2/genotype-3, etc.">
                <option value="no" selected="true">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="yes">
                <param name="levels" type="data" format="tabular"
                       label="A two-column tab-separated file describing levels attributed to each sample name"
                       help="Tip: the sample name list in the vcf collection can be obtained using
                       the IUC Galaxy tool 'Extract element identifiers of a list collection' &lt;br&gt;
                       example:&lt;br&gt;
                       sample1 female&lt;br&gt;
                       sample2 female&lt;br&gt;
                       sample3 male&lt;br&gt;
                       sample4 female&lt;br&gt;
                       sample5 male&lt;br&gt;
                       sample5 male" />
            </when>
            <when value="no" />
        </conditional>
        <conditional name="set_spectrum">
            <param name="choices" type="select" label="Analyse Mutational Spectrum" display="radio">
                <option value="yes" selected="true">Yes</option>
                <option value="no">No</option>
            </param>
            <when value="yes" />
            <when value="no" />
        </conditional>

        <conditional name="set_denovo">
            <param name="choices" type="select" label="Extract de novo signatures with MutationalPatterns" display="radio">
                <option value="yes" selected="true">Yes</option>
                <option value="no">No</option>
            </param>
            <when value="yes">
                <param name="nrun" type="integer" value="10" min="10" max="200"
                       label="Number of cycles to find best fitting of signatures"
                       help="High values extend the computational time"/>
                <param name="rank" type="integer" value="4" min="3" max="30"
                       label="Number of ranks to be displayed for control of optimal number of signature"
                       help="High values extend the computational time. In addition the number of ranks cannot be greater than the number of available samples in the study"/>
                <param name="newsignum" type="integer" value="4" min="2" max="30"
                       label="Number of de novo signatures to capture"
                       help="High values extend the computational time. Note also that you cannot extract more signature than the number of available samples in the study"/>
            </when>
            <when value="no" />
        </conditional>

        <conditional name="set_cosmic">
            <param name="choices" type="select" label="Decompose with Cosmic signature v2" display="radio">
                <option value="yes" selected="true">Yes</option>
                <option value="no">No</option>
            </param>
            <when value="yes">
                <param name="signum" type="integer" value="3" min="2" max="30"
                       label="selects the N most significant signatures in samples to express mutational patterns"
                       help="an integer between 2 and 30 signature types from cosmic"/>
                <param name="cosmic_version" type="select" label="Version of the Cosmic signature set">
                    <option value="v2" selected="true">Cosmic v2, March 2015</option>
                    <option value="v3">Cosmic v3, May 2019</option>
                </param>
            </when>
             <when value="no" />
        </conditional>
        <param name="rdata_out" type="boolean" checked="false" label="Output RData file?" help="Output all the data used by R to construct the tables and plots, can be loaded into R" />
    </inputs>
    <outputs>
        <data name="spectrum" format="pdf" label="Mutational Spectrum">
            <filter>set_spectrum['choices'] == "yes"</filter>
        </data>
        <data name="denovo" format="pdf" label="De novo signatures">
            <filter>set_denovo['choices'] == "yes"</filter>
        </data>
        <data name="sigmatrix" format="tabular" label="De novo signatures probability matrix">
            <filter>set_denovo['choices'] == "yes"</filter>
        </data>
        <data name="cosmic" format="pdf" label="Cosmic signatures">
            <filter>set_cosmic['choices'] == "yes"</filter>
        </data>
        <data name="rdata" format="rdata" label="${tool.name}: RData file">
            <filter>rdata_out</filter>
        </data>

    </outputs>
    <tests>
        <!-- simple profile -->
        <test>
            <param name="vcfs">
                <collection type="list">
                    <element name="1" ftype="vcf" value="G.vcf"/>
                    <element name="2" ftype="vcf" value="H.vcf"/>
                </collection>
            </param>
            <param name="genome" value="BSgenome.Hsapiens.UCSC.hg38"/>
            <conditional name="set_levels">
                <param name="choices" value="yes"/>
            </conditional>
            <param name="levels" value="GH_levels.tab" ftype="tabular"/>
            <conditional name="set_spectrum">
                <param name="choices" value="yes"/>
            </conditional>
            <conditional name="set_denovo">
                <param name="choices" value="no"/>
            </conditional>
            <conditional name="set_cosmic">
                <param name="choices" value="no"/>
            </conditional>
            <output name="spectrum" file="spectrum_output1.pdf" compare="sim_size" ftype="pdf"/>
        </test>

         <!-- de novo signatures -->
        <test>
            <param name="vcfs">
                <collection type="list">
                    <element name="6" value="F.vcf"/>
                    <element name="7" value="G.vcf"/>
                    <element name="8" value="H.vcf"/>
                    <element name="9" value="I.vcf"/>
                </collection>
            </param>
            <param name="genome" value="BSgenome.Hsapiens.UCSC.hg38"/>
            <conditional name="set_spectrum">
                <param name="choices" value="no"/>
            </conditional>
            <conditional name="set_denovo">
                <param name="choices" value="yes"/>
            </conditional>
            <conditional name="set_cosmic">
                <param name="choices" value="no"/>
            </conditional>
            <param name="nrun" value="10" />
            <param name="rank" value="4" />
            <param name="newsignum" value="4" />
            <param name="rdata_out" value="true" />
            <output name="denovo" file="denovo_output1.pdf" compare="sim_size" ftype="pdf"/>
            <output name="sigmatrix" file="sigmatrix.tab" ftype="tabular" compare="sim_size"/>
            <output name="rdata" file="denovo_1.RData" compare="sim_size" delta="400000"/> <!--  delta="170000" -->
        </test>

         <!-- cosmic signatures -->
        <test>
            <param name="vcfs">
                <collection type="list">
                    <element name="6" value="F.vcf"/>
                    <element name="7" value="G.vcf"/>
                    <element name="8" value="H.vcf"/>
                    <element name="9" value="I.vcf"/>
                </collection>
            </param>
            <param name="genome" value="BSgenome.Hsapiens.UCSC.hg38"/>
            <param name="levels" value="FGHI_levels.tab" ftype="tabular"/>
            <conditional name="set_spectrum">
                <param name="choices" value="no"/>
            </conditional>
            <conditional name="set_denovo">
                <param name="choices" value="no"/>
            </conditional>
            <conditional name="set_cosmic">
                <param name="choices" value="yes"/>
            </conditional>
            <param name="signum" value="3" />
            <output name="cosmic" file="cosmic_output1.pdf" compare="sim_size" ftype="pdf"/>
        </test>
        <!-- cosmic signature on single sample -->
        <test>
            <param name="vcfs">
                <collection type="list">
                    <element name="1" value="G.vcf"/>
                </collection>
            </param>
            <param name="genome" value="BSgenome.Hsapiens.UCSC.hg38"/>
            <conditional name="set_spectrum">
                <param name="choices" value="no"/>
            </conditional>
            <conditional name="set_denovo">
                <param name="choices" value="no"/>
            </conditional>
            <conditional name="set_cosmic">
                <param name="choices" value="yes"/>
            </conditional>
            <param name="signum" value="5" />
            <output name="cosmic" file="cosmic_output2.pdf" compare="sim_size" delta="50000" ftype="pdf"/>
        </test>
    </tests>
    <help>

**What it does**

Takes as inputs

* a collection of n vcf files corresponding to n samples.
* a tabular table describing the correspondance of sample names to levels (tissues, ages, sexes, etc.)
* the number of cosmic signatures to decompose mutational patterns of samples


This tool returns a pdf file with the visualisation :

* the Cosine similarity of samples when decomposed over the 30 signatures of cosmic_
* the absolute contribution of the n most contributing cosmic_ signatures in the samples mutational patterns (to be set by the user, between 2 and 30)
* the relative contribution of the n most contributing cosmic_ signatures in the samples mutational patterns  (to be set by the user, between 2 and 30)
* a clustering of the samples with respect to the relative contribution of their cosmic_ signatures
* pie charts of the samples displaying for each sample the relative contribution of the n most contributing cosmic_ signatures in their mutational pattern

.. _cosmic: https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt

    </help>
    <citations>
        <citation type="doi">10.18129/B9.bioc.MutationalPatterns</citation>
        <citation type="doi">10.1186/s13073-018-0539-0</citation>
        <citation type="doi">10.1038/nature12477</citation>
    </citations>
</tool>