Mercurial > repos > devteam > varscan_version_2
changeset 0:6f8cead3dc93 draft
Initial commit of all needed files.
author | devteam |
---|---|
date | Sun, 17 Nov 2013 11:08:56 -0500 |
parents | |
children | 44d514f3df8f |
files | test-data/test_in1.pileup test-data/test_out1.vcf tool_dependencies.xml varscan_mpileup.xml |
diffstat | 4 files changed, 213 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_in1.pileup Sun Nov 17 11:08:56 2013 -0500 @@ -0,0 +1,13 @@ +chr1 10087 a 41 ..+1C...........,,,,,.....C..C,...,..,,..c,.+1C A9D<<#C#<(C9ACAFF<?D>=(#;D#;AF=7898.GJ'6I +chr1 10088 c 41 .......T.....,,,,,.........,..A,..,,..,,. 2BBB<#B#A5?5?GJ;JD?@A?;#6B#HEG)GBBIB>IEE! +chr1 10089 c 41 .............,,,,,.........,...,..,,..,,. 8BBB<#D#?5A9AHJ;IFBBB??#6A#FEG8C=>H?FJ@C$ +chr1 10090 c 41 .............,,,,,.........,...,..,,..,,. 2#AB(#9#9(A<<GJEHEBBDA?#;;#D?H@5=CF1HJ;D' +chr1 10091 t 41 .............,,,,,A........,...,..,,..,,. (#(?5#,#<(?<?FIAF;(D=B9#;@#?#G8-=F=<CJ6BI +chr1 10092 a 41 .............,,,,,.........,...,..,,..,,. 9#9C<#5#9,5(<FH:GE9C?<?#3A#:#A@B8CC5EI=?J +chr1 10093 a 41 .............,,,,,.........,...c..,c..,,. ?#<DC#9#99?39CFEFC?D<(C#,C#1#HE'7G8'>I);J +chr1 10094 c 41 .............,,,,,.....G...,...,..,,..,,T ?#?B?#?#<8A8ABJBIG9D?9<#,?#F#IHH(AIGEHGH6 +chr1 10095 c 41 A............,,,,,.........,...,..,,..,,. ##8B<#A#??B8<EJCJB7BAAA#5B#>#>=F(@HHCIBH6 +chr1 10096 c 41 .............,,,,,........A,...,..,,..,,. ##ADA#B#18B?AHI<IE<AA?D#<;#C#E6B=FGCCIBE9 +chr1 10097 t 41 .............,,,,,.........,..C,..,,..-1A,,-1a. ##(B8#B#8(BB?AFAH:(B2??#<?#8#E'6;;B6@J8@J +chr1 10098 a 41 .............,,,,,.........,...,..,,.*,*. ##3C9#3#9999AFH2H;9D<C?#C3#?#E5?DAD@D!=$J +chr1 10099 a 41 .............,,,,,.........,...c..,,..,,. ##<D<#9#<9?<9FD+DF9CAA?#39#1#H?)C>9.D!.$J
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_out1.vcf Sun Nov 17 11:08:56 2013 -0500 @@ -0,0 +1,26 @@ +##fileformat=VCFv4.1 +##source=VarScan2 +##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15"> +##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)"> +##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant"> +##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant"> +##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called"> +##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> +##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15"> +##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> +##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> +##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> +##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test"> +##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)"> +##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)"> +##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)"> +##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)"> +##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)"> +##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 +chr1 10087 . A AC . PASS ADP=33;WT=0;HET=1;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:41:33:31:2:6.06%:9.8E-1:30:32:22:9:2:0 +chr1 10097 . TA T . PASS ADP=30;WT=0;HET=1;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:41:30:28:2:6.67%:9.8E-1:29:36:18:10:1:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sun Nov 17 11:08:56 2013 -0500 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="varscan" version="2.3.6"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.6.jar</action> + <action type="move_file"> + <source>VarScan.v2.3.6.jar</source> + <destination>$INSTALL_DIR/jars</destination> + </action> + <action type="set_environment"> + <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR/jars</environment_variable> + </action> + </actions> + </install> + <readme> + </readme> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan_mpileup.xml Sun Nov 17 11:08:56 2013 -0500 @@ -0,0 +1,155 @@ +<tool id="varscan" name="Varscan" version="0.1"> + <description>for variant detection</description> + + <requirements> + <requirement type="package" version="2.3.6">varscan</requirement> + </requirements> + + <!-- + The version command string is not yet a template that can be filled in, so version command is not yet possible. + <version_command>java -jar ${GALAXY_DATA_INDEX_DIR}/shared/jars/varscan/VarScan.jar 2>&1 | head -n 1</version_command> + --> + + <command> + ## Set up samples list file. + #if $sample_names.strip() != '': + echo $sample_names | awk -F ',' '{ for (i = 1; i <= NF; i++) { print \$i; } }' > samples_list.txt; + #end if + + ## Set up command + input. + java -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar ${cmd} ${input} + --min-coverage ${min_coverage} + --min-reads2 ${min_supporting_reads} + --min-avg-qual ${min_avg_qual} + --min-var-freq ${min_var_freq} + --min-freq-for-hom ${min_freq_for_hom} + --p-value ${p_value} + #if str($strand_filter) == 'yes': + --strand-filter 1 + #end if + + ## Report only variants in consensus. + #if str($cmd) == 'mpileup2cns': + --variants + #end if + + ## Set up outputs. + --output-vcf 1 > $output + + #if $sample_names.strip() != '': + --vcf-sample-list samples_list.txt + #end if + </command> + + <inputs> + <param format="pileup" name="input" type="data" label="Pileup dataset" help=""/> + + <param name="cmd" type="select" label="Analysis type"> + <option value="mpileup2snp" selected="True">single nucleotide variation</option> + <option value="mpileup2indel">insertions and deletions</option> + <option value="mpileup2cns">consensus genotype</option> + </param> + + <param name="min_coverage" type="integer" value="8" min="1" max="200" label="Minimum read depth" help="Minimum depth at a position to make a call"/> + <param name="min_supporting_reads" type="integer" value="2" min="1" max="200" label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/> + <param name="min_avg_qual" type="integer" value="15" min="1" max="50" label="Minimum base quality at a position to count a read"/> + <param name="min_var_freq" type="float" value="0.01" min="0" max="1" label="Minimum variant allele frequency threshold"/> + <param name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" label="Minimum frequency to call homozygote"/> + <param name="p_value" type="float" value="0.99" min="0" max="1" label="p-value threshold for calling variants"/> + <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand"> + <option value="no" selected="True">no</option> + <option value="yes">yes</option> + </param> + <param name="sample_names" type="text" value="" help="Separate sample names by comma; leave blank to use default sample names."/> + </inputs> + + <stdio> + <regex match="Exception" source="both" level="fatal" description="Tool exception"/> + <regex match=".*" source="both" level="log" description="tool progress"/> + </stdio> + + <outputs> + <data name="output" format="vcf"/> + </outputs> + + <trackster_conf> + </trackster_conf> + + <tests> + <test> + <param name="input" value="test_in1.pileup" /> + <param name="cmd" value="mpileup2cns" /> + <param name="min_coverage" value="8" /> + <param name="min_supporting_reads" value="2" /> + <param name="min_avg_qual" value="15" /> + <param name="min_var_freq" value="0.01" /> + <param name="min_freq_for_hom" value="0.75" /> + <param name="p_value" value="0.99" /> + <param name="strand_filter" value="no" /> + <param name="sample_names" value="" /> + <output name="output" file="test_out1.vcf" lines_diff="0" /> + </test> + </tests> + + <help> +**VarScan Overview** + +VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. It calls variants from a mpileup dataset and produces a VCF 4.1 Full documentation is available online_. + +Please cite: Koboldt, D., Zhang, Q., Larson, D., Shen, D., McLellan, M., Lin, L., Miller, C., Mardis, E., Ding, L., and Wilson, R. (2012). VarScan 2: Somatic mutation and copy number alteration discovery in cancer by exome sequencing Genome Research DOI: 10.1101/gr.129684.111 + +.. _VarScan: http://varscan.sourceforge.net/ +.. _online: http://varscan.sourceforge.net/using-varscan.html + +**Input** + +:: + + mpileup file - The SAMtools mpileup file + + +**Output** + +VarScan produces a VCF 4.1 dataset as output. + +**Parameters** + +:: + + analysis type + single nucleotide detection Identify SNPs from an mpileup file + insertions and deletion Identify indels an mpileup file + consensus genotype Call consensus and variants from an mpileup file + + min-coverage + Minimum read depth at a position to make a call [8] + + min-reads2 + Minimum supporting reads at a position to call variants [2] + + min-avg-qual + Minimum base quality at a position to count a read [15] + + min-var-freq + Minimum variant allele frequency threshold [0.01] + + min-freq-for-hom + Minimum frequency to call homozygote [0.75] + + p-value + Default p-value threshold for calling variants [99e-02] + + strand-filter + Ignore variants with >90% support on one strand [1] + + output-vcf + If set to 1, outputs in VCF format + + vcf-sample-list + For VCF output, a list of sample names in order, one per line + + variants + Report only variant (SNP/indel) positions [0] + + </help> +</tool>