Mercurial > repos > crs4 > kggseq_variant_selection
changeset 1:e9758eee6697 draft
Update to KggSeq v0.7_20150118
author | crs4 |
---|---|
date | Tue, 28 Apr 2015 04:42:14 -0400 |
parents | d388273fb83f |
children | e1a21c2f4997 |
files | COPYING kggseq_variant_selection.xml tool_dependencies.xml |
diffstat | 3 files changed, 99 insertions(+), 81 deletions(-) [+] |
line wrap: on
line diff
--- a/COPYING Fri Sep 12 21:40:16 2014 -0400 +++ b/COPYING Tue Apr 28 04:42:14 2015 -0400 @@ -1,7 +1,7 @@ -Copyright © 2013-2014 CRS4 Srl. http://www.crs4.it/ +Copyright © 2013-2015 CRS4 Srl. http://www.crs4.it/ Created by: Paolo Uva <paolo.uva@crs4.it> -Nicola Soranzo <nicola.soranzo@crs4.it> +Nicola Soranzo <nicola.soranzo@tgac.ac.uk> Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the
--- a/kggseq_variant_selection.xml Fri Sep 12 21:40:16 2014 -0400 +++ b/kggseq_variant_selection.xml Tue Apr 28 04:42:14 2015 -0400 @@ -1,7 +1,7 @@ -<tool id="kggseq_variant_selection" name="Variant selection with KGGSeq" version="1.1"> +<tool id="kggseq_variant_selection" name="Variant selection with KGGSeq" version="1.2"> <description></description> <requirements> - <requirement type="package" version="0.4_20140910">kggseq</requirement> + <requirement type="package" version="0.7_20150118">kggseq</requirement> </requirements> <command> java -jar \$KGGSEQ_JAR_PATH/kggseq.jar @@ -19,49 +19,37 @@ --db-gene $db_gene $composite_subject_id -## Variant filters +## Variant and genotype filters $pass_variant_only -#if str($variant_filters.variant_filters_select) == "yes" - --seq-qual $variant_filters.seq_qual - --seq-mq $variant_filters.seq_mq - --seq-sb $variant_filters.seq_sb - --seq-fs $variant_filters.seq_fs - --min-heta $variant_filters.min_heta - --min-homa $variant_filters.min_homa - --min-hetu $variant_filters.min_hetu - --min-homu $variant_filters.min_homu - --min-obsa $variant_filters.min_obsa - --min-obsu $variant_filters.min_obsu - --min-obs $variant_filters.min_obs - #if str($variant_filters.hwe_control) - --hwe-control $variant_filters.hwe_control - #end if - #if str($variant_filters.hwe_case) - --hwe-case $variant_filters.hwe_case +#if str($variant_genotype_filters.variant_genotype_filters_select) == "yes" + --seq-qual $variant_genotype_filters.seq_qual + --seq-mq $variant_genotype_filters.seq_mq + --seq-sb $variant_genotype_filters.seq_sb + --seq-fs $variant_genotype_filters.seq_fs + --min-heta $variant_genotype_filters.min_heta + --min-homa $variant_genotype_filters.min_homa + --min-hetu $variant_genotype_filters.min_hetu + --min-homu $variant_genotype_filters.min_homu + --min-obsa $variant_genotype_filters.min_obsa + --min-obsu $variant_genotype_filters.min_obsu + --min-obs $variant_genotype_filters.min_obs + #if str($variant_genotype_filters.hwe_control) + --hwe-control $variant_genotype_filters.hwe_control #end if - #if str($variant_filters.hwe_all) - --hwe-all $variant_filters.hwe_all + #if str($variant_genotype_filters.hwe_case) + --hwe-case $variant_genotype_filters.hwe_case + #end if + #if str($variant_genotype_filters.hwe_all) + --hwe-all $variant_genotype_filters.hwe_all #end if + --gty-qual $variant_genotype_filters.gty_qual + --gty-dp $variant_genotype_filters.gty_dp + --gty-sec-pl $variant_genotype_filters.gty_sec_pl + --gty-af-ref $variant_genotype_filters.gty_af_ref + --gty-af-het $variant_genotype_filters.gty_af_het + --gty-af-alt $variant_genotype_filters.gty_af_alt #else - --seq-qual 0 - --seq-mq 0 -#end if - -## Genotype filters -#if str($genotype_filters.genotype_filters_select) == "yes" - --gty-qual $genotype_filters.gty_qual - --gty-dp $genotype_filters.gty_dp - --gty-sec-pl $genotype_filters.gty_sec_pl - --gty-af-ref $genotype_filters.gty_af_ref - --gty-af-het $genotype_filters.gty_af_het - --gty-af-alt $genotype_filters.gty_af_alt -#else - --gty-qual 0 - --gty-dp 0 - --gty-sec-pl 0 - --gty-af-ref 1 - --gty-af-het 0 - --gty-af-alt 0 + --no-qc #end if ## Genetic inheritance @@ -75,6 +63,19 @@ #end if #end if +## Homozygosity - IBS - IBD filters +#if str($hom_ibs_ibd_filters.hom_ibs_ibd_filters_select) == "yes" + #if str($hom_ibs_ibd_filters.homozygosity_case_filter) + --homozygosity-case-filter $hom_ibs_ibd_filters.homozygosity_case_filter + #end if + #if str($hom_ibs_ibd_filters.ibs_case_filter) + --ibs-case-filter $hom_ibs_ibd_filters.ibs_case_filter + #end if + #if str($hom_ibs_ibd_filters.ibd_annot) != 'None' + --ibd-annot $hom_ibs_ibd_filters.ibd_annot + #end if +#end if + ## Gene feature filters #if str($gene_feature_filters.gene_feature_filters_select) == "yes" and $gene_feature_filters.gene_features --gene-feature-in $gene_feature_filters.gene_features @@ -120,6 +121,9 @@ ## Add annotations #if str($add_annotations.add_annotations_select) == "yes" + #if str($add_annotations.o_flanking_seq) + --o-flanking-seq $add_annotations.o_flanking_seq + #end if $add_annotations.genome_annotation $add_annotations.omim_annotation $add_annotations.cosmic_annotation @@ -138,18 +142,19 @@ <inputs> <param name="inputFile" type="data" format="vcf" label="VCF Variant file (--vcf-file)" help="Coordinates must refer to hg19" /> <param name="pedFile" type="data" format="tabular" label="Pedigree (--ped-file)" /> + <param name="composite_subject_id" type="boolean" truevalue="--composite-subject-id" falsevalue="" checked="false" label="Composite subject ID (--composite-subject-id)" /> <param name="db_gene" type="select" display="checkboxes" multiple="true" label="Database(s) to annotate and filter variants (--db-gene)"> <option value="refgene" selected="true">refgene: The RefGene database compiled by UCSC from hg19 refGene. Note: RefSeq has NO mitochondria gene definition</option> - <option value="gencode">gencode: The GENCODE gene sets. Note: GECODE contains similar number of coding genes but more transcripts than RefGene. It HAS the mitochondria gene definition</option> + <option value="gencode">gencode: The GENCODE gene sets. Note: GENCODE contains similar number of coding genes but more transcripts than RefGene. It HAS the mitochondria gene definition</option> <option value="knowngene">knowngene: The UCSC knonwGene datasase compiled by UCSC from hg19 knownGene</option> + <option value="ensembl">ensembl: The Ensembl gene datasase compiled by UCSC from hg19 ensGene</option> <validator type="no_options" message="Select at least one database" /> </param> - <param name="composite_subject_id" type="boolean" truevalue="--composite-subject-id" falsevalue="" checked="false" label="Composite subject ID (--composite-subject-id)" /> <param name="pass_variant_only" type="boolean" truevalue="--vcf-filter-in PASS" falsevalue="" checked="true" label="Don't include filtered loci (--vcf-filter-in PASS)" help="Keep only variants with FILTER equal to PASS" /> - <!-- Variant quality control --> - <conditional name="variant_filters"> - <param name="variant_filters_select" type="select" label="Specify variant quality filters?"> + <!-- Variant and genotype quality control --> + <conditional name="variant_genotype_filters"> + <param name="variant_genotype_filters_select" type="select" label="Specify variant and genotype quality filters?"> <option value="yes">Yes</option> <option value="no" selected="true">No</option> </param> @@ -158,27 +163,16 @@ <param name="seq_mq" type="integer" value="20" label="Minimum overall Phred-scaled mapping quality score for the variant (--seq-mq)" /> <param name="seq_sb" type="float" value="-10" label="Maximal overall strand bias score for the variant (--seq-sb)" /> <param name="seq_fs" type="integer" value="60" label="Maximal overall strand bias Phred-scaled p-value (using Fisher's exact test) for the variant (--seq-fs)" /> - <param name="min_heta" type="integer" value="1" label="Minimal observed number of heterozygote genotypes in cases (the affected) (--min-heta)" /> - <param name="min_homa" type="integer" value="1" label="Minimal observed number of alternate homozygote genotypes in cases (the affected) (--min-homa)" /> - <param name="min_hetu" type="integer" value="1" label="Minimal observed number of heterozygote genotypes in controls (the unaffected) (--min-hetu)" /> - <param name="min_homu" type="integer" value="1" label="Minimal observed number of alternate homozygote genotypes in controls (the unaffected) (--min-homu)" /> - <param name="min_obsa" type="integer" value="1" label="Minimal observed number of non-missing genotypes in cases (the affected) (--min-obsa)" /> - <param name="min_obsu" type="integer" value="1" label="Minimal observed number of non-missing genotypes in controls (the unaffected) (--min-obsu)" /> - <param name="min_obs" type="integer" value="2" label="Minimal observed number of non-missing genotypes in all samples (--min-obs)" /> + <param name="min_heta" type="integer" value="0" label="Minimal observed number of heterozygote genotypes in cases (the affected) (--min-heta)" /> + <param name="min_homa" type="integer" value="0" label="Minimal observed number of alternate homozygote genotypes in cases (the affected) (--min-homa)" /> + <param name="min_hetu" type="integer" value="0" label="Minimal observed number of heterozygote genotypes in controls (the unaffected) (--min-hetu)" /> + <param name="min_homu" type="integer" value="0" label="Minimal observed number of alternate homozygote genotypes in controls (the unaffected) (--min-homu)" /> + <param name="min_obsa" type="integer" value="0" label="Minimal observed number of non-missing genotypes in cases (the affected) (--min-obsa)" /> + <param name="min_obsu" type="integer" value="0" label="Minimal observed number of non-missing genotypes in controls (the unaffected) (--min-obsu)" /> + <param name="min_obs" type="integer" value="1" label="Minimal observed number of non-missing genotypes in all samples (--min-obs)" /> <param name="hwe_control" type="float" value="" optional="true" label="Exclude variants in controls with the Hardy-Weinberg test p-value <= this value (--hwe-control)" /> <param name="hwe_case" type="float" value="" optional="true" label="Exclude variants in cases with the Hardy-Weinberg test p-value <= this value (--hwe-case)" /> <param name="hwe_all" type="float" value="" optional="true" label="Exclude variants in all subjects with the Hardy-Weinberg test p-value <= this value (--hwe-all)" /> - </when> - <when value="no" /> - </conditional> - - <!-- Genotype quality control --> - <conditional name="genotype_filters"> - <param name="genotype_filters_select" type="select" label="Specify genotype quality filters?"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - <when value="yes"> <param name="gty_qual" type="integer" value="10" label="Minimum Phred-scaled genotyping quality (--gty-qual)" /> <param name="gty_dp" type="integer" value="4" label="Minimal read depth per genotype (--gty-dp)" /> <param name="gty_sec_pl" type="integer" value="20" label="Minimal value for second smallest normalized Phred-scaled genotype quality (--gty-sec-pl)" /> @@ -229,6 +223,20 @@ <when value="no" /> </conditional> + <!-- Homozygosity filtering --> + <conditional name="hom_ibs_ibd_filters"> + <param name="hom_ibs_ibd_filters_select" type="select" label="Specify homozygosity filters?"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"> + <param name="homozygosity_case_filter" type="integer" value="" optional="true" label="Filter by Runs of Homozygosity (ROH) (--homozygosity-case-filter)" help="Minimal length (in kb) of consecutive homozygous genotype for each interesting variant" /> + <param name="ibs_case_filter" type="integer" value="" optional="true" label="Filter by Identical by State (IBS) (--ibs-case-filter)" help="Minimal length (in kb) of the region in which there is at least one allele identical among all cases" /> + <param name="ibd_annot" type="data" format="txt,tabular,bed" optional="true" label="Add Identical by Descent (IBD) annotation (--ibd-annot)" help="File with IBD or significant linkage regions. Variants within these regions will be highlighted. Note: title line CHR START END is needed" /> + </when> + <when value="no" /> + </conditional> + <!-- Gene feature filtering --> <conditional name="gene_feature_filters"> <param name="gene_feature_filters_select" type="select" label="Specify gene feature filters?"> @@ -252,29 +260,37 @@ <option value="12">Downstream of transcription end site</option> <option value="13">ncRNA</option> <option value="14">Intergenic</option> - <option value="15">Unknown</option> + <option value="15">Monomorphic</option> + <option value="16">Unknown</option> </param> <param name="splicing" type="integer" value="2" label="Window size in base-pair around the splicing junction to flag the variant as splicing (--splicing)" /> - <param name="neargene" type="integer" value="1000" label="Size of region upstream and downstream (--neargene)" /> + <param name="neargene" type="integer" value="1000" label="Size in base-pair of region upstream and downstream (--neargene)" /> </when> <when value="no" /> </conditional> <!-- Allele frequency filtering --> <conditional name="allele_freq_filters"> - <param name="allele_freq_filters_select" type="select" label="Specify common variants filters?"> + <param name="allele_freq_filters_select" type="select" label="Specify filters by allele frequency?"> <option value="yes">Yes</option> <option value="no" selected="true">No</option> </param> <when value="yes"> <param name="allele_freq_db" type="select" display="checkboxes" multiple="true" label="Select databases for allelic frequency filtering (--db-filter)"> - <option value="hg19_1kg201305" selected="true">hg19_1kg201305: 1000 Genomes Project 2013 May release</option> - <option value="hg19_1kg201204">hg19_1kg201204: 1000 Genomes Project 2012 April release</option> - <option value="hg19_ESP6500AA" selected="true">hg19_ESP6500AA: African American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option> - <option value="hg19_ESP6500EA" selected="true">hg19_ESP6500EA: European American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option> - <option value="hg19_dbsnp141" selected="true">hg19_dbsnp141: dbSNP version 141</option> - <option value="hg19_dbsnp138">hg19_dbsnp138: dbSNP version 138</option> - <option value="hg19_dbsnp137">hg19_dbsnp137: dbSNP version 137</option> + <option value="1kg201305" selected="true">1KG 201305: 1000 Genomes Project 2013 May release</option> + <option value="1kg201204">1KG 201204: 1000 Genomes Project 2012 April release</option> + <option value="1kgafr201204">1KG 201204 AFR: 1000 Genomes Project 2012 April release - African</option> + <option value="1kgeur201204">1KG 201204 EUR: 1000 Genomes Project 2012 April release - European</option> + <option value="1kgamr201204">1KG 201204 AMR: 1000 Genomes Project 2012 April release - Mixed American</option> + <option value="1kgasn201204">1KG 201204 ASN: 1000 Genomes Project 2012 April release - Asian</option> + <option value="ESP6500AA" selected="true">ESP6500AA: African American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option> + <option value="ESP6500EA" selected="true">ESP6500EA: European American dataset from NHLBI GO Exome Sequencing Project (ESP6500)</option> + <option value="dbsnp141" selected="true">dnSNP 141</option> + <option value="dbsnp138">dbSNP 138</option> + <option value="dbsnp138nf">dbSNP 138nf: dbSNP version 138 without the flagged SNPs by UCSC. Flagged SNPs include SNPs clinically associated by dbSNP, mapped to a single location in the reference genome assembly, and not known to have a minor allele frequency of at least 1%</option> + <option value="dbsnp137">dbSNP 137</option> + <option value="dbsnp135">dbSNP 135</option> + <option value="exac">Exome Aggregation Consortium (ExAC): Variants from 61,486 unrelated individuals sequenced as part of various disease-specific and population genetic studies</option> </param> <param name="rare_allele_freq" type="float" value="0.01" label="Minor allele frequency (MAF) for selecting rare variants (--rare-allele-freq)" help="Set to >1 to only annotate MAF, without filtering" /> </when> @@ -314,7 +330,7 @@ <option value="no" selected="true">No</option> </param> <when value="yes"> - <param name="filter_nondisease_variant" type="boolean" truevalue="--filter-nondisease-variant" falsevalue="" checked="true" label="Filter out variants predicted to be non-disease causal (--filter-nondisease-variant)" /> + <param name="filter_nondisease_variant" type="boolean" truevalue="--filter-nondisease-variant" falsevalue="" checked="false" label="Filter out variants predicted to be non-disease causal (--filter-nondisease-variant)" /> <param name="mendel_causing" type="boolean" truevalue="--mendel-causing-predict all" falsevalue="" checked="true" label="Predict Mendelian disease-causing variants by logistic regression model (--mendel-causing-predict)" /> </when> <when value="no" /> @@ -327,10 +343,11 @@ <option value="no" selected="true">No</option> </param> <when value="yes"> + <param name="o_flanking_seq" type="integer" value="" label="Size in bp of flanking sequence to extract (--o-flanking-seq)" /> <param name="genome_annotation" type="boolean" truevalue="--genome-annot" falsevalue="" checked="true" label="Add genomic functional annotations (presudogenes, TFBS, enhancer, UniProt) (--genome-annot)" /> <param name="omim_annotation" type="boolean" truevalue="--omim-annot" falsevalue="" checked="true" label="Add OMIM annotation (--omim-annot)" /> <param name="cosmic_annotation" type="boolean" truevalue="--cosmic-annot" falsevalue="" checked="true" label="Add COSMIC annotation (--cosmic-annot)" /> - <param name="pubmed_type" type="select" label="Text mining in PubMed: find co-mentions of the search terms specified below with:"> + <param name="pubmed_type" type="select" label="Text mining in PubMed: find co-mentions of the search terms specified below with"> <option value="--pubmed-mining">The cytogenetic position of each variant (--pubmed-mining)</option> <option value="--pubmed-mining-gene">The gene in which each variant is located (--pubmed-mining-gene)</option> </param>
--- a/tool_dependencies.xml Fri Sep 12 21:40:16 2014 -0400 +++ b/tool_dependencies.xml Tue Apr 28 04:42:14 2015 -0400 @@ -1,11 +1,12 @@ <?xml version="1.0"?> <tool_dependency> - <package name="kggseq" version="0.4_20140910"> + <package name="kggseq" version="0.7_20150118"> <install version="1.0"> <actions> - <action type="download_by_url" target_filename="kggseq_archive-0.4_20140910.tar.gz">https://github.com/nsoranzo/kggseq_archive/archive/v0.4_20140910.tar.gz</action> + <action type="download_by_url" target_filename="kggseq_archive-0.7_20150118.tar.gz">https://github.com/crs4/kggseq_archive/archive/v0.7_20150118.tar.gz</action> <action type="shell_command">touch test.vcf</action> - <action type="shell_command">java -jar kggseq.jar --no-lib-check --resource resources --buildver hg19 --db-filter hg19_1kg201305,hg19_1kg201204,hg19_ESP6500AA,hg19_ESP6500EA,hg19_dbsnp141,hg19_dbsnp138,hg19_dbsnp137 --genome-annot --db-gene refgene,gencode,knowngene --db-score dbnsfp --superdup-annot --vcf-file test.vcf</action> + <action type="shell_command">java -jar kggseq.jar --no-lib-check --resource resources --buildver hg19 --db-filter 1kg201305,1kg201204,1kgafr201204,1kgeur201204,1kgamr201204,1kgasn201204,dbsnp135,dbsnp137,dbsnp138,dbsnp138nf,dbsnp141,ESP6500AA,ESP6500EA,exac --genome-annot --db-gene refgene,gencode,knowngene,ensembl --db-score dbnsfp --superdup-annot --cosmic-annot --vcf-file test.vcf</action> + <action type="shell_command">java -Xmx4g -jar kggseq.jar --no-lib-check --no-qc --resource resources --buildver hg19 --o-flanking-seq 10 --vcf-file examples/rare.disease.hg19.vcf</action> <action type="move_directory_files"> <source_directory>.</source_directory> <destination_directory>$INSTALL_DIR</destination_directory>