view mutspecFilter.xml @ 1:748b7a8b634c draft

Uploaded
author iarc
date Thu, 21 Apr 2016 09:36:32 -0400
parents 8c682b3a7c5b
children 9d363eb081b5
line wrap: on
line source

<tool id="MutSpecfilter" name="MutSpec Filter" version="0.1" hidden="false">
<description>Filter out variants present in public databases</description>

<requirements>
    <requirement type="set_environment">SCRIPT_PATH</requirement>
    <requirement type="package" version="5.18.1">perl</requirement>
</requirements>

<command interpreter="perl">
        mutspecFilter.pl 
        --dir \$SCRIPT_PATH 
        $segDup
        $esp
        $thG
	    #if $FilterdbSNP.dbSNP == True:
           --dbSNP ${FilterdbSNP.column}
        #else
           --dbSNP 0
        #end if
	--refGenome ${refGenome}
        --outfile $output
	$input
</command>

<inputs>
	<param name="input" type="data" format="txt" label="Input file"/>
	
	<param name="refGenome" type="select" label="Reference genome" help="All your data should have been annotated with the selected genome">
        <options from_data_table="annovar_index" />
    </param>

    <conditional name="FilterdbSNP">
        <param name="dbSNP" type="boolean" checked="true" truevalue="true" label="Filter against dbSNP database" help="Remove variants with a RS number" />
        <when value="true">
            <param name="column" type="data_column" data_ref="input" label="Select the dbSNP column for filtering" use_header_names="true" help="Select a column name snp or snpNonFlagged" />
        </when>
    </conditional>    


    <param name="segDup" type="boolean" checked="true" truevalue="--segDup" falsevalue="" label="Filter against SegDup database" help="Remove variants present at &#62;= 0.9 frequency in the genomic duplicate segments database" />
    <param name="esp" type="boolean" checked="true" truevalue="--esp" falsevalue="" label="Filter against the ESP database" help="Remove variants present at frequency &#62; 0.001 in the Exome Sequencing Project database (only valid for human genomes)" />
    <param name="thG" type="boolean" checked="true" truevalue="--thG" falsevalue="" label="Filter against the 1000g database project" help="Remove variants present at frequency &#62; 0.001 in the 1000 genome database (only valid for human genomes)" />
</inputs>

<outputs>
  	<data  type="data" name="output" format="tabular" label="${input.name.split(' ')[0]} filtered" />
</outputs>

<help>

**What it does**

Filter a file annotated with MutSpec-Annot tool. Variants present in public databases (dbSNP, SegDup, ESP, 1000 genome obtained from Annovar) will be removed from the input file (with frequency limits described above).

.. class:: warningmark

The databases ESP and 1000 genome can be used only for human genomes

--------------------------------------------------------------------------------------------------------------------------------------------------

**Input**

.. class:: warningmark

Tab delimited text files generated by MutSpec-Annot tool.

--------------------------------------------------------------------------------------------------------------------------------------------------

**Output**

Tab delimited text file filtered for variants considered as neutral polymorphisms.

--------------------------------------------------------------------------------------------------------------------------------------------------

**Example**

Filter the following file::

     Chr    Start      End        Ref  Alt  Func.refGene  Gene.refGene  ExonicFunc.refGene    AAChange.refGene                           genomicSuperDups                   snp138       1000g2014oct_all  esp6500si_all  Strand  context  Chromosome  Start_Position  End_Position  Reference_Allele  Tumor_Seq_Allele2
     chr7   121717919  121717920  -    G    exonic        AASS          frameshift insertion  AASS:NM_005763:exon23:c.2634dupC:p.A879fs  NA                                 rs147476318  NA                NA             -       GCG      chr7        121717919       121717920     -                 G
     chr1   230846235  230846235  T    A    exonic        AGT           nonsynonymous SNV     AGT:NM_000029:exon2:c.A362T:p.H121L        NA                                 NA           NA                NA             -       GTG      chr1        230846235       230846235     T                 A
     chr14  33290999   33290999   A    G    exonic        AKAP6         nonsynonymous SNV     AKAP6:NM_004274:exon13:c.A3980G:p.D1327G   NA                                 NA           NA                NA             +       GAC      chr14       33290999        33290999      A                 G
     chr12  8082458    8082458    C    T    exonic        SLC2A3        nonsynonymous SNV     SLC2A3:NM_006931:exon6:c.G683A:p.R228Q     NA                                 rs200481428  0.000199681       NA             -       CCG      chr12       8082458         8082458       C                 T
     chr4   70156391   70156391   T    C    exonic        UGT2B28       nonsynonymous SNV     UGT2B28:NM_053039:exon5:c.T1172C:p.V391A   score=0.949699;Name=chr4:70035680  NA           0.000199681       NA             +       GTA      chr4        70156391        70156391      T                 C

Will produce::

     Chr    Start      End        Ref  Alt  Func.refGene  Gene.refGene  ExonicFunc.refGene    AAChange.refGene                           genomicSuperDups                   snp138       1000g2014oct_all  esp6500si_all  Strand  context  Chromosome  Start_Position  End_Position  Reference_Allele  Tumor_Seq_Allele2
     chr1   230846235  230846235  T    A    exonic        AGT           nonsynonymous SNV     AGT:NM_000029:exon2:c.A362T:p.H121L        NA                                 NA           NA                NA             -       GTG      chr1        230846235       230846235     T                 A
     chr14  33290999   33290999   A    G    exonic        AKAP6         nonsynonymous SNV     AKAP6:NM_004274:exon13:c.A3980G:p.D1327G   NA                                 NA           NA                NA             +       GAC      chr14       33290999        33290999      A                 G
     chr4   70156391   70156391   T    C    exonic        UGT2B28       nonsynonymous SNV     UGT2B28:NM_053039:exon5:c.T1172C:p.V391A   score=0.949699;Name=chr4:70035680  NA           0.000199681       NA             +       GTA      chr4        70156391        70156391      T                 C



</help>

<citations>
    <citation type="bibtex">
        @ARTICLE{ardin_mutspec:_2016,
            author = {Ardin et al},
            keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions},
            title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},
            url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1011-z}
        }
    </citation>
</citations>

</tool>