view kggseq_variant_selection.xml @ 0:d388273fb83f draft

Uploaded
author crs4
date Fri, 12 Sep 2014 21:40:16 -0400
parents
children e9758eee6697
line wrap: on
line source

<tool id="kggseq_variant_selection" name="Variant selection with KGGSeq" version="1.1">
  <description></description>
  <requirements>
    <requirement type="package" version="0.4_20140910">kggseq</requirement>
  </requirements>
  <command>
java -jar \$KGGSEQ_JAR_PATH/kggseq.jar
## Environmental settings
--buildver hg19
--resource \$KGGSEQ_JAR_PATH/resources
--no-lib-check
--no-resource-check
--no-progress-check
--out results
--o-vcf

--vcf-file $inputFile
--ped-file $pedFile
--db-gene $db_gene
$composite_subject_id

## Variant filters
$pass_variant_only
#if str($variant_filters.variant_filters_select) == "yes"
  --seq-qual $variant_filters.seq_qual
  --seq-mq $variant_filters.seq_mq
  --seq-sb $variant_filters.seq_sb
  --seq-fs $variant_filters.seq_fs
  --min-heta $variant_filters.min_heta
  --min-homa $variant_filters.min_homa
  --min-hetu $variant_filters.min_hetu
  --min-homu $variant_filters.min_homu
  --min-obsa $variant_filters.min_obsa
  --min-obsu $variant_filters.min_obsu
  --min-obs $variant_filters.min_obs
  #if str($variant_filters.hwe_control)
    --hwe-control $variant_filters.hwe_control
  #end if
  #if str($variant_filters.hwe_case)
    --hwe-case $variant_filters.hwe_case
  #end if
  #if str($variant_filters.hwe_all)
    --hwe-all $variant_filters.hwe_all
  #end if
#else
  --seq-qual 0
  --seq-mq 0
#end if

## Genotype filters
#if str($genotype_filters.genotype_filters_select) == "yes"
  --gty-qual $genotype_filters.gty_qual
  --gty-dp $genotype_filters.gty_dp
  --gty-sec-pl $genotype_filters.gty_sec_pl
  --gty-af-ref $genotype_filters.gty_af_ref
  --gty-af-het $genotype_filters.gty_af_het
  --gty-af-alt $genotype_filters.gty_af_alt
#else
  --gty-qual 0
  --gty-dp 0
  --gty-sec-pl 0
  --gty-af-ref 1
  --gty-af-het 0
  --gty-af-alt 0
#end if

## Genetic inheritance
#if str($genetic_filters.genetic_filters_select) == "yes"
  #if str($genetic_filters.genetic_model.genetic_model_select) == "yes"
    #if $genetic_filters.genetic_model.custom_genetic_params
      --genotype-filter $genetic_filters.genetic_model.custom_genetic_params
    #end if
  #else
    $genetic_filters.genetic_model.suggested_genetic_params
  #end if
#end if

## Gene feature filters
#if str($gene_feature_filters.gene_feature_filters_select) == "yes" and $gene_feature_filters.gene_features
  --gene-feature-in $gene_feature_filters.gene_features
  --splicing $gene_feature_filters.splicing
  --neargene $gene_feature_filters.neargene
#end if

## Common variants filters
#if str($allele_freq_filters.allele_freq_filters_select) == "yes"
  #if $allele_freq_filters.allele_freq_db
    --db-filter $allele_freq_filters.allele_freq_db
  #end if
  --rare-allele-freq $allele_freq_filters.rare_allele_freq
#end if

## Genomic regions filters
#if str($genomic_region_filters.genomic_region_filters_select) == "yes"
  $genomic_region_filters.ignore_indel_or_snv
  #if $genomic_region_filters.regions_in
    --regions-in "$genomic_region_filters.regions_in"
  #end if
  #if $genomic_region_filters.regions_out
    --regions-out "$genomic_region_filters.regions_out"
  #end if
  #if $genomic_region_filters.genes_in
    --genes-in "$genomic_region_filters.genes_in"
  #end if
  #if $genomic_region_filters.genes_out
    --genes-out "$genomic_region_filters.genes_out"
  #end if
  $genomic_region_filters.superdup
  #if str($genomic_region_filters.gene_var_filter)
    --gene-var-filter $genomic_region_filters.gene_var_filter
  #end if
#end if

## Predicted impact filters
#if str($impact_filters.impact_filters_select) == "yes"
  --db-score dbnsfp
  $impact_filters.filter_nondisease_variant
  $impact_filters.mendel_causing
#end if

## Add annotations
#if str($add_annotations.add_annotations_select) == "yes"
  $add_annotations.genome_annotation
  $add_annotations.omim_annotation
  $add_annotations.cosmic_annotation
  #if $add_annotations.pubmed_mining_gene
    $add_annotations.pubmed_type "$add_annotations.pubmed_mining_gene"
  #end if
  #if str($add_annotations.shared_genes.shared_genes_select) == "yes"
    --ppi-annot string
    --candi-file $add_annotations.shared_genes.candi_file
    --ppi-depth $add_annotations.shared_genes.ppi_depth
    --pathway-annot $add_annotations.shared_genes.pathway_annot
  #end if
#end if
&gt; $logFile
  </command>
  <inputs>
    <param name="inputFile" type="data" format="vcf" label="VCF Variant file (--vcf-file)" help="Coordinates must refer to hg19" />
    <param name="pedFile" type="data" format="tabular" label="Pedigree (--ped-file)" />
    <param name="db_gene" type="select" display="checkboxes" multiple="true" label="Database(s) to annotate and filter variants (--db-gene)">
      <option value="refgene" selected="true">refgene: The RefGene database compiled by UCSC from hg19 refGene. Note: RefSeq has NO mitochondria gene definition</option>
      <option value="gencode">gencode: The GENCODE gene sets. Note: GECODE contains similar number of coding genes but more transcripts than RefGene. It HAS the mitochondria gene definition</option>
      <option value="knowngene">knowngene: The UCSC knonwGene datasase compiled by UCSC from hg19 knownGene</option>
      <validator type="no_options" message="Select at least one database" />
    </param>
    <param name="composite_subject_id" type="boolean" truevalue="--composite-subject-id" falsevalue="" checked="false" label="Composite subject ID (--composite-subject-id)" />
    <param name="pass_variant_only" type="boolean" truevalue="--vcf-filter-in PASS" falsevalue="" checked="true" label="Don't include filtered loci (--vcf-filter-in PASS)" help="Keep only variants with FILTER equal to PASS" />

    <!-- Variant quality control -->
    <conditional name="variant_filters">
      <param name="variant_filters_select" type="select" label="Specify variant quality filters?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="seq_qual" type="integer" value="50" label="Minimum overall Phred-scaled sequencing quality score for the variant (--seq-qual)" />
        <param name="seq_mq" type="integer" value="20" label="Minimum overall Phred-scaled mapping quality score for the variant (--seq-mq)" />
        <param name="seq_sb" type="float" value="-10" label="Maximal overall strand bias score for the variant (--seq-sb)" />
        <param name="seq_fs" type="integer" value="60" label="Maximal overall strand bias Phred-scaled p-value (using Fisher's exact test) for the variant (--seq-fs)" />
        <param name="min_heta" type="integer" value="1" label="Minimal observed number of heterozygote genotypes in cases (the affected) (--min-heta)" />
        <param name="min_homa" type="integer" value="1" label="Minimal observed number of alternate homozygote genotypes in cases (the affected) (--min-homa)" />
        <param name="min_hetu" type="integer" value="1" label="Minimal observed number of heterozygote genotypes in controls (the unaffected) (--min-hetu)" />
        <param name="min_homu" type="integer" value="1" label="Minimal observed number of alternate homozygote genotypes in controls (the unaffected) (--min-homu)" />
        <param name="min_obsa" type="integer" value="1" label="Minimal observed number of non-missing genotypes in cases (the affected) (--min-obsa)" />
        <param name="min_obsu" type="integer" value="1" label="Minimal observed number of non-missing genotypes in controls (the unaffected) (--min-obsu)" />
        <param name="min_obs" type="integer" value="2" label="Minimal observed number of non-missing genotypes in all samples (--min-obs)" />
        <param name="hwe_control" type="float" value="" optional="true" label="Exclude variants in controls with the Hardy-Weinberg test p-value &lt;= this value (--hwe-control)" />
        <param name="hwe_case" type="float" value="" optional="true" label="Exclude variants in cases with the Hardy-Weinberg test p-value &lt;= this value (--hwe-case)" />
        <param name="hwe_all" type="float" value="" optional="true" label="Exclude variants in all subjects with the Hardy-Weinberg test p-value &lt;= this value (--hwe-all)" />
      </when>
      <when value="no" />
    </conditional>

    <!-- Genotype quality control -->
    <conditional name="genotype_filters">
      <param name="genotype_filters_select" type="select" label="Specify genotype quality filters?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="gty_qual" type="integer" value="10" label="Minimum Phred-scaled genotyping quality (--gty-qual)" />
        <param name="gty_dp" type="integer" value="4" label="Minimal read depth per genotype (--gty-dp)" />
        <param name="gty_sec_pl" type="integer" value="20" label="Minimal value for second smallest normalized Phred-scaled genotype quality (--gty-sec-pl)" />
        <param name="gty_af_ref" type="float" value="0.05" label="Maximal fraction of reads carrying alternative allele at a reference allele homozygous genotype (--gty-af-ref)" />
        <param name="gty_af_het" type="float" value="0.25" label="Minimal fraction of reads carrying alternative allele at a heterozygous genotype (--gty-af-het)" />
        <param name="gty_af_alt" type="float" value="0.5" label="Minimal fraction of reads carrying alternative allele at a alternative allele homozygous genotype (--gty-af-alt)" />
      </when>
      <when value="no" />
    </conditional>

    <!-- Genetic inheritance -->
    <conditional name="genetic_filters">
      <param name="genetic_filters_select" type="select" label="Specify genetic inheritance?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <conditional name="genetic_model">
          <param name="genetic_model_select" type="select" label="Specify advanced settings for genetic inheritance?">
            <option value="yes">Yes</option>
            <option value="no" selected="true">No</option>
          </param>
          <!-- Custom genetic filters -->
          <when value="yes">
            <param name="custom_genetic_params" type="select" display="checkboxes" multiple="true" label="Select genetic inheritance (advanced) (--genotype-filter)" help="Multiple filtration codes have logical OR relationship">
              <option value="1">Exclude variants at which affected subjects have heterozygous genotypes (1)</option>
              <option value="2">Exclude variants at which both affected and unaffected subjects have the same homozygous genotypes (2)</option>
              <option value="3">Exclude variants at which affected subjects have reference homozygous genotypes (3)</option>
              <option value="4">Exclude variants at which both affected and unaffected subjects have the same heterozygous genotypes (4)</option>
              <option value="5">Exclude variants at which affected subjects have alternative homozygous genotypes (5)</option>
              <option value="6">Exclude variants at which affected family members have NO shared alleles (6)</option>
              <option value="7">ONLY include variants at which an offspring has one or two non-inherited alleles (7)</option>
            </param>
          </when>
          <!-- Suggested genetic filters -->
          <when value="no">
            <param name="suggested_genetic_params" type="select" display="radio" label="Select genetic inheritance">
              <option value="--genotype-filter 1,2,3">Recessive (--genotype-filter 1,2,3)</option>
              <option value="--double-hit-gene-trio-filter" selected="true">Recessive and compound-heterozygous (--double-hit-gene-trio-filter)</option>
              <option value="--double-hit-gene-phased-filter">Recessive and compound-heterozygous with phased samples (--double-hit-gene-phased-filter)</option>
              <option value="--genotype-filter 2,3,4,5">Dominant (--genotype-filter 2,3,4,5)</option>
              <option value="--genotype-filter 7">De novo mutation (--genotype-filter 7)</option>
              <option value="--unique-gene-filter">Only genes on which EVERY affected subject has at least one case-unique alternative allele, but these alleles may be from different variants (--unique-gene-filter)</option>
            </param>
          </when>
        </conditional>
      </when>
      <when value="no" />
    </conditional>

    <!-- Gene feature filtering -->
    <conditional name="gene_feature_filters">
      <param name="gene_feature_filters_select" type="select" label="Specify gene feature filters?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="gene_features" type="select" display="checkboxes" multiple="true" label="Select gene features (--gene-feature-in)" help="Variants falling outside the selected regions will be excluded">
          <option value="0" selected="true">Frame-shift</option>
          <option value="1" selected="true">Loss of amino acids</option>
          <option value="2" selected="true">Loss of stop codon (TAG, TAA, TGA)</option>
          <option value="3" selected="true">Gain of stop codon (TAG, TAA, TGA)</option>
          <option value="4" selected="true">Missense</option>
          <option value="5" selected="true">Splicing</option>
          <option value="6">Synonymous</option>
          <option value="7">Exonic</option>
          <option value="8">5-UTR</option>
          <option value="9">3-UTR</option>
          <option value="10">Intronic</option>
          <option value="11">Upstream of transcription start site</option>
          <option value="12">Downstream of transcription end site</option>
          <option value="13">ncRNA</option>
          <option value="14">Intergenic</option>
          <option value="15">Unknown</option>
        </param>
        <param name="splicing" type="integer" value="2" label="Window size in base-pair around the splicing junction to flag the variant as splicing (--splicing)" />
        <param name="neargene" type="integer" value="1000" label="Size of region upstream and downstream (--neargene)" />
      </when>
      <when value="no" />
    </conditional>

    <!-- Allele frequency filtering -->
    <conditional name="allele_freq_filters">
      <param name="allele_freq_filters_select" type="select" label="Specify common variants filters?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="allele_freq_db" type="select" display="checkboxes" multiple="true" label="Select databases for allelic frequency filtering (--db-filter)">
          <option value="hg19_1kg201305" selected="true">hg19_1kg201305: 1000 Genomes Project 2013 May release</option>
          <option value="hg19_1kg201204">hg19_1kg201204: 1000 Genomes Project 2012 April release</option>
          <option value="hg19_ESP6500AA" selected="true">hg19_ESP6500AA: African American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option>
          <option value="hg19_ESP6500EA" selected="true">hg19_ESP6500EA: European American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option>
          <option value="hg19_dbsnp141" selected="true">hg19_dbsnp141: dbSNP version 141</option>
          <option value="hg19_dbsnp138">hg19_dbsnp138: dbSNP version 138</option>
          <option value="hg19_dbsnp137">hg19_dbsnp137: dbSNP version 137</option>
        </param>
        <param name="rare_allele_freq" type="float" value="0.01" label="Minor allele frequency (MAF) for selecting rare variants (--rare-allele-freq)" help="Set to &gt;1 to only annotate MAF, without filtering" />
      </when>
      <when value="no" />
    </conditional>

    <!-- Variant type, region and gene filtering -->
    <conditional name="genomic_region_filters">
      <param name="genomic_region_filters_select" type="select" label="Specify variant type, region and gene filtering?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="ignore_indel_or_snv" type="select" label="Ignore indels or SNVs?">
          <option value="" selected="true">No</option>
          <option value="--ignore-indel">Ignore insertion and deletion sequence variants (indels) (--ignore-indel)</option>
          <option value="--ignore-snv">Ignore single nucleotide variants (SNVs) (--ignore-snv)</option>
        </param>
        <param name="regions_in" type="text" label="Keep only variants within some genomic regions (--regions-in)" help="A comma-separated list of genomic regions, e.g. chrX,chrY:1-1000" />
        <param name="regions_out" type="text" label="Ignore variants within some genomic regions (--regions-out)" help="A comma-separated list of genomic regions, e.g. chrX,chrY:1-1000" />
        <param name="genes_in" type="text" label="Keep only variants within some genes (--genes-in)" help="A comma-separated list of gene symbols, e.g. TCF4,CNNM2,ANK3" />
        <param name="genes_out" type="text" label="Ignore variants within some genes (--genes-out)" help="A comma-separated list of gene symbols, e.g. TCF4,CNNM2,ANK3" />
        <param name="superdup" type="select" label="Annotate or filter out variants in super duplicate regions?" help="As defined in genomicSuperDups dataset from UCSC">
          <option value="" selected="true">No</option>
          <option value="--superdup-annot">Annotate variants in super duplicate regions (--superdup-annot)</option>
          <option value="--superdup-filter">Filter out variants in super duplicate regions (--superdup-filter)</option>
        </param>
        <param name="gene_var_filter" type="integer" value="" optional="true" label="Filter out genes with at least this number of putative pathogenic variants (--gene-var-filter)" help="As a rule of thumb, it is safe to set a cutoff of 4 or more. " />
      </when>
      <when value="no" />
    </conditional>

    <!-- Predicted impact filtering -->
    <conditional name="impact_filters">
      <param name="impact_filters_select" type="select" label="Specify functional impact filters?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="filter_nondisease_variant" type="boolean" truevalue="--filter-nondisease-variant" falsevalue="" checked="true" label="Filter out variants predicted to be non-disease causal (--filter-nondisease-variant)" />
        <param name="mendel_causing" type="boolean" truevalue="--mendel-causing-predict all" falsevalue="" checked="true" label="Predict Mendelian disease-causing variants by logistic regression model (--mendel-causing-predict)" />
      </when>
      <when value="no" />
    </conditional>

    <!-- Annotations -->
    <conditional name="add_annotations">
      <param name="add_annotations_select" type="select" label="Add annotations?">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="genome_annotation" type="boolean" truevalue="--genome-annot" falsevalue="" checked="true" label="Add genomic functional annotations (presudogenes, TFBS, enhancer, UniProt) (--genome-annot)" />
        <param name="omim_annotation" type="boolean" truevalue="--omim-annot" falsevalue="" checked="true" label="Add OMIM annotation (--omim-annot)" />
        <param name="cosmic_annotation" type="boolean" truevalue="--cosmic-annot" falsevalue="" checked="true" label="Add COSMIC annotation (--cosmic-annot)" />
        <param name="pubmed_type" type="select" label="Text mining in PubMed: find co-mentions of the search terms specified below with:">
          <option value="--pubmed-mining">The cytogenetic position of each variant (--pubmed-mining)</option>
          <option value="--pubmed-mining-gene">The gene in which each variant is located (--pubmed-mining-gene)</option>
        </param>
        <param name="pubmed_mining_gene" type="text" label="Text mining in PubMed: search term(s) of interest (e.g. disease name)" help="A comma-separated list of search terms, each composed by plus-separated words, e.g. spinocerebellar+ataxia,other+search+term. If empty, no search will be performed" />
        <!-- Shared protein-protein interactions and pathways -->
        <conditional name="shared_genes">
          <param name="shared_genes_select" type="select" label="Add annotations for shared interactions/pathways?">
            <option value="yes">Yes</option>
            <option value="no" selected="true">No</option>
          </param>
          <when value="yes">
            <param name="candi_file" type="data" format="txt,tabular" label="List of candidate genes of interest (--candi-file)" />
            <param name="ppi_depth" type="integer" value="1" label="Maximum distance of a protein-protein interaction (PPI) between candidate genes and genes containing the variants (--ppi-depth)" />
            <param name="pathway_annot" type="select" label="Select databases for the identification of shared pathways between candidate genes and genes containing the variants (--pathway-annot)" help="Gene sets are extracted from MSigDB">
              <option value="cura" selected="true">Curated gene sets (4850)</option>
              <option value="cano">Canonical pathways (1452)</option>
              <option value="onco">Oncogenic signatures (189)</option>
              <option value="cmop">Computational gene sets (858)</option>
              <option value="onto">Gene Ontology gene sets (1454)</option>
            </param>
          </when>
          <when value="no" />
        </conditional>
      </when>
      <when value="no" />
    </conditional>
  </inputs>
  <outputs>
    <data name="outVcf" format="vcf" label="${tool.name} on ${on_string}: VCF" from_work_dir="results.flt.vcf" />
    <data name="outTabular" format="tabular" label="${tool.name} on ${on_string}: tabular" from_work_dir="results.flt.txt" />
    <data name="outDoubleHitTriosGty" format="tabular" label="${tool.name} on ${on_string}: double-hit genotypes" from_work_dir="results.doublehit.gene.trios.flt.gty.txt">
      <filter>genetic_filters['genetic_filters_select'] == "yes" and genetic_filters['genetic_model']['genetic_model_select'] == "no" and genetic_filters['genetic_model']['suggested_genetic_params'] == "--double-hit-gene-trio-filter"</filter>
    </data>
    <data name="outDoubleHitTriosCount" format="tabular" label="${tool.name} on ${on_string}: double-hit counts" from_work_dir="results.doublehit.gene.trios.flt.count.txt">
      <filter>genetic_filters['genetic_filters_select'] == "yes" and genetic_filters['genetic_model']['genetic_model_select'] == "no" and genetic_filters['genetic_model']['suggested_genetic_params'] == "--double-hit-gene-trio-filter"</filter>
    </data>
    <data name="outDoubleHitPhasedGty" format="tabular" label="${tool.name} on ${on_string}: double-hit phased genotypes" from_work_dir="results.doublehit.gene.phased.flt.gty.txt">
      <filter>genetic_filters['genetic_filters_select'] == "yes" and genetic_filters['genetic_model']['genetic_model_select'] == "no" and genetic_filters['genetic_model']['suggested_genetic_params'] == "--double-hit-gene-phased-filter"</filter>
    </data>
    <data name="outDoubleHitPhasedCount" format="tabular" label="${tool.name} on ${on_string}: double-hit phased counts" from_work_dir="results.doublehit.gene.phased.flt.count.txt">
      <filter>genetic_filters['genetic_filters_select'] == "yes" and genetic_filters['genetic_model']['genetic_model_select'] == "no" and genetic_filters['genetic_model']['suggested_genetic_params'] == "--double-hit-gene-phased-filter"</filter>
    </data>
    <data name="logFile" format="txt" label="${tool.name} on ${on_string}: log" />
  </outputs>
  <help>
**What it does**

This tool uses `KGGSeq`_ to filter and prioritize genetic variants from sequencing data.

**License and citation**

This Galaxy tool is Copyright © 2013-2014 `CRS4 Srl.`_ and is released under the `MIT license`_.

.. _CRS4 Srl.: http://www.crs4.it/
.. _MIT license: http://opensource.org/licenses/MIT

You can use this tool only if you agree to the license terms of: `KGGSeq`_.

.. _KGGSeq: http://statgenpro.psychiatry.hku.hk/limx/kggseq/

If you use this tool, please cite:

- |Cuccuru2014|_
- |Li2012|_
- |Li2013|_.

.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
.. |Li2012| replace:: Li, M.-X., *et al.* (2012) A comprehensive framework for prioritizing variants in exome sequencing studies of Mendelian diseases. *Nucleic Acids Res.* 40(7), e53
.. _Li2012: http://nar.oxfordjournals.org/content/40/7/e53
.. |Li2013| replace:: Li, M.-X., *et al.* (2013) Predicting Mendelian Disease-Causing Non-Synonymous Single Nucleotide Variants in Exome Sequencing Studies. *PLoS Genet.* 9(1), e1003143
.. _Li2013: http://www.plosgenetics.org/article/info:doi/10.1371/journal.pgen.1003143
  </help>
</tool>