Mercurial > repos > mini > strelka
changeset 0:7a9f20ca4ad5
Uploaded
author | mini |
---|---|
date | Thu, 25 Sep 2014 11:59:08 -0400 |
parents | |
children | f48854499d41 |
files | strelka2/.configureStrelkaWorkflow.pl.swp strelka2/.strelka.xml.swp strelka2/.strelka_wrapper.bash.swo strelka2/.strelka_wrapper.py.swp strelka2/configureStrelkaWorkflow.pl strelka2/lib/.Utils.pm.swp strelka2/lib/Utils.pm strelka2/libexec/callSomaticVariants.pl strelka2/libexec/consolidateResults.pl strelka2/libexec/countFastaBases strelka2/libexec/filterSomaticVariants.pl strelka2/libexec/strelka2 strelka2/strelka.xml strelka2/strelka2.xml strelka2/strelka_config.sample strelka2/strelka_wrapper.py strelka2/tool_dependencies.xml |
diffstat | 17 files changed, 2198 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/configureStrelkaWorkflow.pl Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,564 @@ +#!/usr/bin/env perl + +=head1 LICENSE + +Strelka Workflow Software +Copyright (c) 2009-2013 Illumina, Inc. + +This software is provided under the terms and conditions of the +Illumina Open Source Software License 1. + +You should have received a copy of the Illumina Open Source +Software License 1 along with this program. If not, see +<https://github.com/downloads/sequencing/licenses/>. + +=head1 SYNOPSIS + +configureStrelkaWorkflow.pl --tumor FILE --normal FILE --ref FILE --config FILE [options] + +This script configures the strelka workflow for somatic variant +calling on matched tumor-normal BAM files. The configuration process +will produce an analysis makefile and directory structure. The +makefile can be used to run the analysis on a workstation or compute +cluster via make/qmake or other makefile compatible process. + +=head1 ARGUMENTS + +=over 4 + +=item --tumor FILE + +Path to tumor sample BAM file (required) + +=item --normal FILE + +Path to normal sample BAM file (required) + +=item --ref FILE + +Path to reference genome fasta (required) + +=item --config FILE + +Strelka configuration file. Default config files can be found in +${STRELKA_INSTALL_ROOT}/etc/ for both ELAND and BWA alignments. (required) + +=back + +=head1 OPTIONS + +=over 4 + +=item --output-dir DIRECTORY + +Root of the analysis directory. This script will place all +configuration files in the analysis directory, and after configuration +all results and intermediate files will be written within the analysis +directory during a run. This directory must not already +exist. (default: ./strelkaAnalysis) + +=back + +=cut + +use warnings FATAL => 'all'; +use strict; + +use Carp; +$SIG{__DIE__} = \&Carp::confess; + +use Cwd qw(getcwd); +use File::Spec; +use Getopt::Long; +use Pod::Usage; + +my $baseDir; +my $libDir; +BEGIN { + my $thisDir=(File::Spec->splitpath($0))[1]; + $baseDir=$thisDir;#$baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); + $libDir=File::Spec->catdir($baseDir,'lib'); +} +use lib $libDir; +use Utils; + +if(getAbsPath($baseDir)) { + errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); +} +my $libexecDir=File::Spec->catdir($baseDir,'libexec'); +#my $optDir=File::Spec->catdir($baseDir,'opt'); + + +my $scriptName=(File::Spec->splitpath($0))[2]; +my $argCount=scalar(@ARGV); +my $cmdline = join(' ',$0,@ARGV); + + +sub usage() { pod2usage(-verbose => 1, + -exitval => 2); } + +# +# user configuration: +# + +my ($tumorBam, $normalBam, $refFile, $configFile, $outDir); +my $help; + +GetOptions( "tumor=s" => \$tumorBam, + "normal=s" => \$normalBam, + "ref=s" => \$refFile, + "config=s" => \$configFile, + "output-dir=s" => \$outDir, + "help|h" => \$help) || usage(); + +usage() if($help); +usage() unless($argCount); + + +# +# Validate input conditions: +# + +sub checkFileArg($$) { + my ($file,$label) = @_; + + errorX("Must specify $label file") unless(defined($file)); #raise an error if file not defined + checkFile($file,$label); +} + +checkFileArg($tumorBam,"tumor BAM"); +checkFileArg($normalBam,"normal BAM"); +checkFileArg($refFile,"reference fasta"); +checkFileArg($configFile,"configuration ini"); + +sub makeAbsoluteFilePaths(\$) { + my ($filePathRef) = @_; + + my ($v,$fileDir,$fileName) = File::Spec->splitpath($$filePathRef); + if(getAbsPath($fileDir)) { + errorX("Can't resolve directory path for '$fileDir' from input file argument: '$$filePathRef'"); + } + $$filePathRef = File::Spec->catfile($fileDir,$fileName); +} + +makeAbsoluteFilePaths($tumorBam); +makeAbsoluteFilePaths($normalBam); +makeAbsoluteFilePaths($refFile); +makeAbsoluteFilePaths($configFile); + +# also check for BAM index files: +sub checkBamIndex($) { + my ($file) = @_; + my $ifile = $file . ".bai"; + if(! -f $ifile) { + errorX("Can't find index for BAM file '$file'"); + } +} + +checkBamIndex($tumorBam); +checkBamIndex($normalBam); + + +sub checkFaIndex($) { + my ($file) = @_; + my $ifile = $file . ".fai"; + if(! -f $ifile) { + errorX("Can't find index for fasta file '$file'"); + } + # check that fai file isn't improperly formatted (a la the GATK bundle NCBI 37 fai files) + open(my $FH,"< $ifile") || errorX("Can't open fai file '$ifile'"); + my $lineno=1; + while(<$FH>) { + chomp; + my @F=split(); + if(scalar(@F) != 5) { + errorX("Unexpected format for line number '$lineno' of fasta index file: '$ifile'\n\tRe-running fasta indexing may fix the issue. To do so, run: \"samtools faidx $file\""); + } + $lineno++; + } + close($FH); +} + +checkFaIndex($refFile); + + +if(defined($outDir)) { + if(getAbsPath($outDir)) { + errorX("Can't resolve path for ouput directory: '$outDir'"); + } +} else { + $outDir=File::Spec->catdir(Cwd::getcwd(),'strelkaAnalysis'); +} + +if(-e $outDir) { + errorX("Output path already exists: '$outDir'"); +} + +if(getAbsPath($baseDir)) { + errorX("Can't resolve path for strelka install directory: '$baseDir'"); +} + + #my $samtoolsDir = File::Spec->catdir($optDir,'samtools'); +checkDir($libexecDir,"strelka libexec"); + #checkDir($samtoolsDir,"samtools"); + +my $callScriptName = "callSomaticVariants.pl"; +my $filterScriptName = "filterSomaticVariants.pl"; +my $finishScriptName = "consolidateResults.pl"; +my $callScript = File::Spec->catfile($libexecDir,$callScriptName); +my $filterScript = File::Spec->catfile($libexecDir,$filterScriptName); +my $finishScript = File::Spec->catfile($libexecDir,$finishScriptName); +my $countFasta = File::Spec->catfile($libexecDir,"countFastaBases"); + #my $samtoolsBin = File::Spec->catfile($samtoolsDir,"samtools"); +checkFile($callScript,"somatic variant call script"); +checkFile($filterScript,"somatic variant filter script"); +checkFile($finishScript,"result consolidation script"); +checkFile($countFasta,"fasta scanner"); +#checkFile($samtoolsBin,"samtools"); + +# +# Configure bin runs: +# +checkMakeDir($outDir); + +# +# Configure bin runs: open and validate config ini +# +my $config = parseConfigIni($configFile); + +sub checkConfigKeys($) { + my ($keyref) = @_; + for (@$keyref) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); + } +} + +# these are the keys we need at configuration time: +my @config_keys = qw(binSize); + +# these are additional keys we will need to run the workflow: +# (note we don't check for (maxInputDepth,minTier2Mapq) for back compatibility with older config files) +my @workflow_keys = qw( +minTier1Mapq isWriteRealignedBam ssnvPrior sindelPrior ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac +ssnvQuality_LowerBound sindelQuality_LowerBound isSkipDepthFilters depthFilterMultiple +snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac indelMaxRefRepeat +indelMaxWindowFilteredBasecallFrac indelMaxIntHpolLength); + +checkConfigKeys(\@config_keys); +checkConfigKeys(\@workflow_keys); + + +my $binSize = int($config->{user}{binSize}); + +$config->{derived}{configurationCmdline} = $cmdline; +$config->{derived}{normalBam} = $normalBam; +$config->{derived}{tumorBam} = $tumorBam; +$config->{derived}{refFile} = $refFile; +$config->{derived}{outDir} = $outDir; + +# +# Configure bin runs: check for consistent chrom info between BAMs and reference +# +sub getBamChromInfo($) { + my $file = shift; + my $cmd = "samtools view -H $file |"; + open(my $FH,$cmd) || errorX("Can't open process $cmd"); + + my %info; + my $n=0; + while(<$FH>) { + next unless(/^\@SQ/); + chomp; + my @F = split(/\t/); + scalar(@F) >= 3 || errorX("Unexpected bam header for file '$file'"); + + my %h = (); + foreach (@F) { + my @vals = split(':'); + $h{$vals[0]} = $vals[1]; + } + $F[1] = $h{'SN'}; + $F[2] = $h{'LN'}; + + my $size = int($F[2]); + ($size > 0) || errorX("Unexpected chromosome size '$size' in bam header for file '$file'"); + $info{$F[1]}{size} = $size; + $info{$F[1]}{order} = $n; + $n++; + } + close($FH) || errorX("Can't close process $cmd"); + return %info; +} + + +my %chromInfo = getBamChromInfo($normalBam); +my @chroms = sort { $chromInfo{$a}{order} <=> $chromInfo{$b}{order} } (keys(%chromInfo)); + +{ + #consistency check: + my %tumorChromInfo = getBamChromInfo($tumorBam); + for my $chrom (@chroms) { + my $ln = $chromInfo{$chrom}{size}; + my $tln = $tumorChromInfo{$chrom}{size}; + my $order = $chromInfo{$chrom}{order}; + my $torder = $tumorChromInfo{$chrom}{order}; + unless(defined($tln) && ($tln==$ln) && ($torder==$order)) { + errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'"); + } + delete $tumorChromInfo{$chrom}; + } + for my $chrom (keys(%tumorChromInfo)) { + errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'"); + } +} + + +my %refChromInfo; +logX("Scanning reference genome"); +{ + my $knownGenomeSize=0; + my $cmd="$countFasta $refFile |"; + open(my $FFH,$cmd) || errorX("Failed to open process '$cmd'"); + + while(<$FFH>) { + chomp; + my @F = split(/\t/); + scalar(@F) == 4 || errorX("Unexpected value from '$cmd'"); + $knownGenomeSize += int($F[2]); + $refChromInfo{$F[1]}{knownSize} = int($F[2]); + $refChromInfo{$F[1]}{size} = int($F[3]); + } + close($FFH) || errorX("Failed to close process '$cmd'"); + + #consistency check: + for my $chrom (@chroms) { + my $ln = $chromInfo{$chrom}{size}; + my $rln = $refChromInfo{$chrom}{size}; + unless(defined($rln) && ($rln==$ln)) { + errorX("BAM headers and reference fasta disagree on chromosome: '$chrom'"); + } + $config->{derived}{"chrom_${chrom}_size"} = $rln; + $config->{derived}{"chrom_${chrom}_knownSize"} = $refChromInfo{$chrom}{knownSize}; + } + $config->{derived}{chromOrder} = join("\t",@chroms); + + $config->{derived}{knownGenomeSize} = $knownGenomeSize; +} +logX("Scanning reference genome complete"); + + + +# +# Configure bin runs: create directory structure +# +my $resultsDir = File::Spec->catdir($outDir,'results'); +checkMakeDir($resultsDir); +if($config->{user}{isWriteRealignedBam}) { + my $bamDir = File::Spec->catdir($outDir,'realigned'); + checkMakeDir($bamDir); +} +my $chromRootDir = File::Spec->catdir($outDir,'chromosomes'); +checkMakeDir($chromRootDir); +for my $chrom (@chroms) { + my $chromDir = File::Spec->catdir($chromRootDir,$chrom); + checkMakeDir($chromDir); + + my $chromRef = $chromInfo{$chrom}; + $chromRef->{dir} = $chromDir; + $chromRef->{binList} = getBinList($chromRef->{size},$binSize); + + my $binRootDir = File::Spec->catdir($chromDir,'bins'); + checkMakeDir($binRootDir); + + for my $binId ( @{$chromRef->{binList}} ) { + my $binDir = File::Spec->catdir($binRootDir,$binId); + checkMakeDir($binDir); + } +} + + + +# +# write run config file: +# +my $runConfigFile; +{ + my $cstr = <<END; +; +; Strelka workflow configuration file +; +; This is an automatically generated file, you probably don't want to edit it. If starting a new run, +; input configuration templates (with comments) can be found in the Strelka etc/ directory. +; +END + + $cstr .= writeConfigIni($config); + + my $configDir = File::Spec->catdir($outDir,'config'); + checkMakeDir($configDir); + $runConfigFile = File::Spec->catdir($configDir,'run.config.ini'); + open(my $FH,"> $runConfigFile") || errorX("Can't open file '$runConfigFile'"); + print $FH $cstr; + close($FH); +} + + + +# +# create makefile +# +my $makeFile = File::Spec->catfile($outDir,"Makefile"); +open(my $MAKEFH, "> $makeFile") || errorX("Can't open file: '$makeFile'"); + +my $completeFile = "task.complete"; + +print $MAKEFH <<ENDE; +# This makefile was automatically generated by $scriptName +# +# Please do not edit. + +script_dir := $libexecDir +call_script := \$(script_dir)/$callScriptName +filter_script := \$(script_dir)/$filterScriptName +finish_script := \$(script_dir)/$finishScriptName + +config_file := $runConfigFile + +analysis_dir := $outDir +results_dir := \$(analysis_dir)/results + +ENDE + +print $MAKEFH <<'ENDE'; + +complete_tag := task.complete + +finish_task := $(analysis_dir)/$(complete_tag) + +get_chrom_dir = $(analysis_dir)/chromosomes/$1 +get_chrom_task = $(call get_chrom_dir,$1)/$(complete_tag) +get_bin_task = $(call get_chrom_dir,$1)/bins/$2/$(complete_tag) + + + +all: $(finish_task) + @$(print_success) + + +define print_success +echo;\ +echo Analysis complete. Final somatic calls can be found in $(results_dir);\ +echo +endef + + +# top level results target: +# +$(finish_task): + $(finish_script) --config=$(config_file) && touch $@ + + +# chromosome targets: +# +ENDE + +for my $chrom (@chroms) { + + print $MAKEFH <<ENDE; +chrom_${chrom}_task := \$(call get_chrom_task,$chrom) +\$(finish_task): \$(chrom_${chrom}_task) +\$(chrom_${chrom}_task): + \$(filter_script) --config=\$(config_file) --chrom=$chrom && touch \$@ + +ENDE + +} + +print $MAKEFH <<ENDE; + +# chromosome bin targets: +# +ENDE + +for my $chrom (@chroms) { + for my $bin (@{$chromInfo{$chrom}{binList}}) { + +print $MAKEFH <<ENDE; +chrom_${chrom}_bin_${bin}_task := \$(call get_bin_task,$chrom,$bin) +\$(chrom_${chrom}_task): \$(chrom_${chrom}_bin_${bin}_task) +\$(chrom_${chrom}_bin_${bin}_task): + \$(call_script) --config=\$(config_file) --chrom=$chrom --bin=$bin && touch \$@ + +ENDE + + } +} + + +# If the eval function is available, this is the way we could finish +# the makefile without being so verbose but it doesn't look like qmake +# understands this function. + +=cut + +print $MAKEFH <<ENDE; + +chroms := @chroms + +ENDE + +for my $chrom (@chroms) { + print $MAKEFH "${chrom}_bins := " . join(" ",@{$chromInfo{$chrom}{binList}}) . "\n"; +} + +print $MAKEFH <<'ENDE'; + +define chrom_task_template +chrom_$1_task := $(call get_chrom_task,$1) +$(finish_task): $$(chrom_$1_task) +$$(chrom_$1_task): + $$(filter_script) --config=$$(config_file) --chrom=$1 && touch $$@ +endef + +$(foreach c,$(chroms),$(eval $(call chrom_task_template,$c))) + + +# chromosome bin targets: +# +define chrom_bin_task_template +chrom_$1_bin_$2_task := $(call get_bin_task,$1,$2) +$$(chrom_$1_task): $$(chrom_$1_bin_$2_task) +$$(chrom_$1_bin_$2_task): + $$(call_script) --config=$$(config_file) --chrom=$1 --bin=$2 && touch $$@ +endef + +$(foreach c,$(chroms), \ + $(foreach b,$($c_bins),$(eval $(call chrom_bin_task_template,$c,$b))) \ + ) + +ENDE + +=cut + + + +print <<END; + + +Successfully configured analysis and created makefile '$makeFile'. + +To run the analysis locally using make, run: + +make -C $outDir + +...or: + +cd $outDir +make + +END + +1; + +__END__ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/lib/Utils.pm Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,172 @@ + +=head1 LICENSE + +Strelka Workflow Software +Copyright (c) 2009-2013 Illumina, Inc. + +This software is provided under the terms and conditions of the +Illumina Open Source Software License 1. + +You should have received a copy of the Illumina Open Source +Software License 1 along with this program. If not, see +<https://github.com/downloads/sequencing/licenses/>. + +=cut + + +package Utils; + +use base 'Exporter'; + +our @EXPORT = qw( + errorX logX executeCmd checkFile checkDir checkMove + getAbsPath checkMakeDir getBinList + parseConfigIni writeConfigIni + ); + +use warnings FATAL => 'all'; +use strict; + +use Carp; +use Cwd qw(realpath); +use File::Copy qw(move); +use File::Path qw(mkpath); + + +sub errorX($) { + confess "\nERROR: " . $_[0] . "\n\n"; +} + +sub logX($) { + print STDERR "INFO: " . $_[0] . "\n"; +} + + +sub executeCmd($;$) { + my $cmd = shift; + my $isVerbose = shift; + + logX("Running: '$cmd'") if(defined($isVerbose) and $isVerbose); + system($cmd) == 0 + or errorX("Failed system call: '$cmd'"); +} +#return an error if file does not exist +sub checkFile($;$) { + my $file = shift; + return if(-f $file); + my $label = shift; + errorX("Can't find" . (defined($label) ? " $label" : "") . " file: '$file'"); +} +#return an error if file does not Exist +sub checkDir($;$) { + my $dir = shift; + return if(-d $dir); + my $label = shift; + errorX("Can't find" . (defined($label) ? " $label" : "") . " directory: '$dir'"); +} + +sub checkMove($$) { + my ($old,$new) = @_; + move($old,$new) || errorX("File move failed: $!\n\tAttempting to move '$old' to '$new'"); +} + + + + +=item getAbsPath($path) + +This procedure attempts to convert a path provided by the user on the +command line into an absolute path. It should be able to handle "~" +paths and conventional relative paths using ".." or ".". Resolution of +links should follow the convention of "Cwd::realpath". + +B<Parameters:> + + $dirRef - path (converted to absolute path in place) + +B<Returns:> + + returns zero if successful, non-zero otherwise. + +=cut +sub getAbsPath(\$) { + my ($dirRef) = @_; + my @tmp=glob($$dirRef); + return 1 if(scalar(@tmp) != 1); + my $ret = Cwd::realpath($tmp[0]); + return 1 if !$ret && !($ret = File::Spec->rel2abs($tmp[0])); + $$dirRef = $ret; + return 0; +} + + +#verify path is not a file, then create a directory with this name if does not exist +sub checkMakeDir($) { + my $dir = shift; + unless (-e $dir) { + File::Path::mkpath($dir) || errorX("Can't create directory '$dir'"); + } else { + errorX("Path is not a directory '$dir'\n") unless(-d $dir); + } +} + + + +sub getBinList($$) { + my ($chromSize,$binSize) = @_; + + my $nm1 = (($chromSize-1) / $binSize); + return [ map {sprintf("%04i",$_)} (0..$nm1) ]; +} + + + +sub parseConfigError($$) { + my ($file,$line) = @_; + errorX("Config file '$file' contains unexpected line '$line'\n"); +} + +#lis le fichier de config, si la ligne est de type : some space + [ some character ] + some space then register ther character in $section. ( [user] , then $section=user). +sub parseConfigIni($) { + my $file = shift; + my %config; + open(my $FH,"< $file") || errorX("Can't open config file '$file'"); + my $section = "noSection"; + while(<$FH>) { + next if(/^[;#]/); + next if(/^\s*$/); + chomp; + my $line=$_; + my @ncl = split(/[;#]/); + next unless(scalar(@ncl)); + my $nc = $ncl[0]; + if($nc =~ /^\s*\[([^\]]*)\]\s*$/) { + $section = $1; + next; + } + my ($key,$val) = map { s/^\s+//; s/\s+$//; $_ } split(/=/,$nc,2); + unless(defined($key) && defined($val) && ($key ne "")) { parseConfigError($file,$line); } + + $config{$section}{$key} = $val; + } + close($FH); + return \%config; +} + + +# minimal ini stringifier: +# +sub writeConfigIni($) { + my $config = shift; + my $val = ""; + for my $section (sort(keys(%$config))) { + $val .= "\n[$section]\n"; + for my $key (sort(keys(%{$config->{$section}}))) { + $val .= "$key = " . $config->{$section}{$key} . "\n"; + } + } + return $val; +} + + +1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/libexec/callSomaticVariants.pl Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,241 @@ +#!/usr/bin/env perl +print "\n"; +print @INC; +print "\n"; +=head1 LICENSE + +Strelka Workflow Software +Copyright (c) 2009-2013 Illumina, Inc. + +This software is provided under the terms and conditions of the +Illumina Open Source Software License 1. + +You should have received a copy of the Illumina Open Source +Software License 1 along with this program. If not, see +<https://github.com/downloads/sequencing/licenses/>. + +=head1 SYNOPSIS + +callSomaticVariants.pl [options] | --help + +=head2 SUMMARY + +Run the somatic variant caller for snvs and indels on a single +chromosome bin. + +=cut + +use warnings FATAL => 'all'; +use strict; + +use Carp; +$SIG{__DIE__} = \&Carp::confess; + +use File::Spec; +use Getopt::Long; +use Pod::Usage; + +my $baseDir; +my $libDir; +BEGIN { +print "\n"; +print @INC; +print "\n"; + + my $thisDir=(File::Spec->splitpath($0))[1]; + $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); + $libDir=File::Spec->catdir($baseDir,'lib'); +} +use lib $libDir; +use Utils; + +if(getAbsPath($baseDir)) { + errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); +} +my $libexecDir=File::Spec->catdir($baseDir,'libexec'); +#my $optDir=File::Spec->catdir($baseDir,'opt'); + + +my $scriptName=(File::Spec->splitpath($0))[2]; +my $argCount=scalar(@ARGV); +my $cmdline = join(' ',$0,@ARGV); + + +my ($chrom, $binId, $configFile); +my $help; + +GetOptions( + "chrom=s" => \$chrom, + "bin=s" => \$binId, + "config=s" => \$configFile, + "help|h" => \$help) or pod2usage(2); + +pod2usage(2) if($help); +pod2usage(2) unless(defined($chrom)); +pod2usage(2) unless(defined($binId)); +pod2usage(2) unless(defined($configFile)); + + + +# +# check all fixed paths (not based on commandline arguments): +# +checkDir($baseDir); +checkDir($libexecDir); +#checkDir($optDir); + +my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2'); +checkFile($strelkaBin,"strelka binary"); +#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); +#checkFile($samtoolsBin,"samtools binary"); + + + +# +# read config and validate values +# +checkFile($configFile,"configuration ini"); +my $config = parseConfigIni($configFile); + +for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); +} + +# we skip maxInputDepth,minTier2Mapq here to allow older config files +for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior + ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); +} + +my $outDir = $config->{derived}{outDir}; +my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId); +checkDir($outDir,"output"); +checkDir($binDir,"output bin"); + + +my $tumorBam = $config->{derived}{tumorBam}; +my $normalBam = $config->{derived}{normalBam}; +my $refFile = $config->{derived}{refFile}; +checkFile($tumorBam,"tumor BAM"); +checkFile($normalBam,"normal BAM"); +checkFile($refFile,"reference"); + + +# pull out some config options for convenience: +my $binSize=$config->{user}{binSize}; +my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam}; +my $knownGenomeSize = $config->{derived}{knownGenomeSize}; + + +my $begin = (int($binId)*$binSize)+1; +my $end = ((int($binId)+1)*$binSize); +#my $end = $begin+100000; #debug mode + +my $useroptions = $config->{user}; + +# set previous default value if an older config file is being used: +if(! defined($useroptions->{minTier2Mapq})) { + $useroptions->{minTier2Mapq} = 5; +} + + +# +# setup the strelka command-line: +# +my $strelka_base_opts= "-clobber" . +" -filter-unanchored" . +" -min-paired-align-score " . $useroptions->{minTier1Mapq} . +" -min-single-align-score 10" . +" -min-qscore 0" . +" -report-range-begin $begin -report-range-end $end" . +" -samtools-reference '$refFile'" . +" -max-window-mismatch 3 20 -print-used-allele-counts" . +" -bam-seq-name '" . $chrom . "'" . +" -genome-size $knownGenomeSize" . +" -max-indel-size 50" . +" -indel-nonsite-match-prob 0.5" . +" --min-contig-open-end-support 35" . +" --somatic-snv-rate " . $useroptions->{ssnvPrior} . +" --shared-site-error-rate " . $useroptions->{ssnvNoise} . +" --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} . +" --somatic-indel-rate " . $useroptions->{sindelPrior} . +" --shared-indel-error-rate " . $useroptions->{sindelNoise} . +" --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} . +" --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} . +" --tier2-single-align-score-rescue-mode" . +" --tier2-mismatch-density-filter-count 10" . +" --tier2-no-filter-unanchored" . +" --tier2-indel-nonsite-match-prob 0.25" . +" --tier2-include-singleton" . +" --tier2-include-anomalous"; + + +my $somSnvFile='somatic.snvs.unfiltered.vcf'; +my $somIndelFile='somatic.indels.unfiltered.vcf'; + +my $cmd = "$strelkaBin $strelka_base_opts" . +" -bam-file " . $normalBam . +" --tumor-bam-file " . $tumorBam . +" --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) . +" --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) . +" --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window'); + + +sub ualignFile($) { + return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam"); +} +sub alignFile($) { + return File::Spec->catfile($binDir,$_[0] . ".realigned"); +} + + +if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) { + $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth}; +} + + +if($isWriteRealignedBam) { + $cmd .= " -realigned-read-file " . ualignFile("normal") . + " --tumor-realigned-read-file " . ualignFile("tumor"); +} + +if(defined($useroptions->{extraStrelkaArguments})){ + my $arg=$useroptions->{extraStrelkaArguments}; + if($arg !~ /^\s*$/) { + $cmd .= " " . $arg; + } +} + +# this file contains site stats that used to be printed on stdout: +# +$cmd .= +" --report-file " . File::Spec->catfile($binDir,'strelka.stats'); + +$cmd .= +" >| " . File::Spec->catfile($binDir,'strelka.stdout') . +" 2>| " . File::Spec->catfile($binDir,'strelka.stderr'); + + +executeCmd($cmd,0); + + +if($isWriteRealignedBam) { + for my $label (qw(normal tumor)) { + my $ufile = ualignFile($label); + if( -f $ufile ) { + my $afile = alignFile($label); + my $cmd = "samtools sort " . $ufile . " " . $afile; + executeCmd($cmd,0); + unlink($ufile); + } else { + logX("Can't find unsorted realigned BAM file: '$ufile'"); + } + } +} + + +1; + +__END__ + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/libexec/consolidateResults.pl Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,276 @@ +#!/usr/bin/env perl + +=head1 LICENSE + +Strelka Workflow Software +Copyright (c) 2009-2013 Illumina, Inc. + +This software is provided under the terms and conditions of the +Illumina Open Source Software License 1. + +You should have received a copy of the Illumina Open Source +Software License 1 along with this program. If not, see +<https://github.com/downloads/sequencing/licenses/>. + +=head1 SYNOPSIS + +consolidateSomaticVariants.pl [options] | --help + +=head2 SUMMARY + +Aggregate final results from all chromosomes + +=cut + +use warnings FATAL => 'all'; +use strict; + +use Carp; +$SIG{__DIE__} = \&Carp::confess; + +use File::Spec; +use File::Temp; +use Getopt::Long; +use Pod::Usage; + +my $baseDir; +my $libDir; +#my $optDir; +#my $vcftDir; +BEGIN { + my $thisDir=(File::Spec->splitpath($0))[1]; + $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); + $libDir=File::Spec->catdir($baseDir,'lib'); + #$optDir=File::Spec->catdir($baseDir,'opt'); + #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl'); +} +use lib $libDir; +use Utils; +#use lib $vcftDir; +use Vcf; + +if(getAbsPath($baseDir)) { + errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); +} +#$optDir=File::Spec->catdir($baseDir,'opt'); + + +my $scriptName=(File::Spec->splitpath($0))[2]; +my $argCount=scalar(@ARGV); +my $cmdline=join(' ',$0,@ARGV); + + +my $configFile; +my $help; + +GetOptions( "config=s" => \$configFile, + "help|h" => \$help) or pod2usage(2); + +pod2usage(2) if($help); +pod2usage(2) unless(defined($configFile)); + +# +# check fixed paths +# +#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); +#checkFile($samtoolsBin,"samtools binary"); + + +# +# read config and validate values +# +checkFile($configFile,"configuration ini"); +my $config = parseConfigIni($configFile); + + +for (qw(outDir chromOrder)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); +} +for (qw(isWriteRealignedBam binSize)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); +} + +my $userconfig = $config->{user}; + +my @chromOrder = split(/\t/,$config->{derived}{chromOrder}); +for my $chrom (@chromOrder) { + my $chromSizeKey = "chrom_" . $chrom . "_size"; + errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey)); +} + +my $outDir = $config->{derived}{outDir}; +checkDir($outDir,"output"); + + +my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam}; + +for my $chrom (@chromOrder) { + my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); + checkDir($chromDir,"input chromosome"); + + next unless($isWriteRealignedBam); + my $chromSizeKey = "chrom_" . $chrom . "_size"; + my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); + for my $binId (@$binList) { + my $dir = File::Spec->catdir($chromDir,'bins',$binId); + checkDir($dir,"input bin"); + } +} + + + +# suffix used for large result file intermediates: +my $itag = ".incomplete"; + + +# +# concatenate vcfs: +# +sub concatenateVcfs($) { + my $fileName = shift; + + my $is_first = 1; + + my $allFileName = "all." . $fileName; + my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag); + open(my $aFH,'>',"$allFile") + || errorX("Failed to open file: '$allFile'"); + + # loop over all chroms once to create the header, and one more time for all the data: + my $headervcf; + for my $chrom (@chromOrder) { + my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); + my $iFile = File::Spec->catfile($chromDir,$fileName); + checkFile($iFile); + + my $depthKey="maxDepth_${chrom}"; + + if($is_first) { + open(my $iFH,'<',"$iFile") + || errorX("Failed to open file: '$iFile'"); + $headervcf = Vcf->new(fh=>$iFH); + $headervcf->parse_header(); + $headervcf->remove_header_line(key=>"cmdline"); + $headervcf->add_header_line({key=>"cmdline",value=>$cmdline}); + $headervcf->remove_header_line(key=>"$depthKey"); + close($iFH); + $is_first=0; + } + + { + open(my $iFH,'<',"$iFile") + || errorX("Failed to open file: '$iFile'"); + my $vcf = Vcf->new(fh=>$iFH); + $vcf->parse_header(); + for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) { + # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!! + $headervcf->add_header_line($line->[0]); + } + $vcf->close(); + close($iFH); + } + } + print $aFH $headervcf->format_header(); + $headervcf->close(); + + for my $chrom (@chromOrder) { + my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); + my $iFile = File::Spec->catfile($chromDir,$fileName); + + open(my $iFH,'<',"$iFile") + || errorX("Failed to open file: '$iFile'"); + + my $vcf = Vcf->new(fh=>$iFH); + $vcf->parse_header(); + print $aFH $_ while(<$iFH>); + } + + close($aFH); + + # make a second set of files with only the passed variants: + my $passedFileName = "passed." . $fileName; + my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag); + open(my $pFH,'>',"$passedFile") + || errorX("Failed to open file: '$passedFile'"); + + open(my $arFH,'<',"$allFile") + || errorX("Failed to open file: '$allFile'"); + + while(<$arFH>) { + chomp; + unless(/^#/) { + my @F = split(/\t/); + next if((scalar(@F)>=7) && ($F[6] ne "PASS")); + } + print $pFH "$_\n"; + } + + close($arFH); + close($pFH); + + my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName); + checkMove($allFile,$allFileFinished); + + my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName); + checkMove($passedFile,$passedFileFinished); +} + +concatenateVcfs("somatic.snvs.vcf"); +concatenateVcfs("somatic.indels.vcf"); + + +my $bamSuffix = ".realigned.bam"; + +sub consolidateBam($) { + my $label = shift; + + my $fileName = $label . $bamSuffix; + + my $reDir = File::Spec->catdir($outDir,'realigned'); + checkMakeDir($reDir); + + my @bamList; + for my $chrom (@chromOrder) { + my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); + + my $chromSizeKey = "chrom_" . $chrom . "_size"; + my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); + for my $binId (@$binList) { + my $binDir = File::Spec->catdir($chromDir,'bins',$binId); + my $rbamFile = File::Spec->catfile($binDir,$fileName); + checkFile($rbamFile,"bin realigned bam file"); + + push @bamList,$rbamFile; + } + } + + return unless(scalar(@bamList)); + + my $headerFH = File::Temp->new(); + my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'"; + executeCmd($getHeaderCmd); + + my $allFile = File::Spec->catfile($reDir,$fileName . $itag); + my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList); + executeCmd($cmd); + + my $allFileFinished = File::Spec->catfile($reDir,$fileName); + checkMove($allFile,$allFileFinished); + + my $indexCmd="$samtoolsBin index $allFileFinished"; + executeCmd($indexCmd); + + # for now don't remove all the bin realignments... + # unlink(@bamList); +} + +if($isWriteRealignedBam) { + consolidateBam("normal"); + consolidateBam("tumor"); +} + + +1; + +__END__ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/libexec/filterSomaticVariants.pl Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,435 @@ +#!/usr/bin/env perl + +=head1 LICENSE + +Strelka Workflow Software +Copyright (c) 2009-2013 Illumina, Inc. + +This software is provided under the terms and conditions of the +Illumina Open Source Software License 1. + +You should have received a copy of the Illumina Open Source +Software License 1 along with this program. If not, see +<https://github.com/downloads/sequencing/licenses/>. + +=head1 SYNOPSIS + +filterSomaticVariants.pl [options] | --help + +=head2 SUMMARY + +Aggregate and filter the variant caller results by chromosome + +=cut + +use warnings FATAL => 'all'; +use strict; + +use Carp; +$SIG{__DIE__} = \&Carp::confess; + +use File::Spec; +use Getopt::Long; +use Pod::Usage; + +my $baseDir; +my $libDir; +#my $vcftDir; +BEGIN { + my $thisDir=(File::Spec->splitpath($0))[1]; + $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); + $libDir=File::Spec->catdir($baseDir,'lib'); + #my $optDir=File::Spec->catdir($baseDir,'opt'); + #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl'); +} +use lib $libDir; +use Utils; +#use lib $vcftDir; +#use Vcf; + + +my $scriptName=(File::Spec->splitpath($0))[2]; +my $argCount=scalar(@ARGV); +my $cmdline = join(' ',$0,@ARGV); + + +my ($chrom, $configFile); +my $help; + +GetOptions( "chrom=s" => \$chrom, + "config=s" => \$configFile, + "help|h" => \$help) or pod2usage(2); + +pod2usage(2) if($help); +pod2usage(2) unless(defined($chrom)); +pod2usage(2) unless(defined($configFile)); + + + +# +# read config and validate values +# +checkFile($configFile,"configuration ini"); +my $config = parseConfigIni($configFile); + +my $chromSizeKey = "chrom_" . $chrom . "_size"; +my $chromKnownSizeKey = "chrom_" . $chrom . "_knownSize"; + +for (("outDir", $chromSizeKey, $chromKnownSizeKey)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); +} +for (qw(binSize ssnvQuality_LowerBound sindelQuality_LowerBound + isSkipDepthFilters depthFilterMultiple + snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac + indelMaxRefRepeat indelMaxWindowFilteredBasecallFrac + indelMaxIntHpolLength)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); +} + +my $outDir = $config->{derived}{outDir}; +my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); +checkDir($outDir,"output"); +checkDir($chromDir,"output chromosome"); + +my $userconfig = $config->{user}; + +my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); +for my $binId (@$binList) { + my $dir = File::Spec->catdir($chromDir,'bins',$binId); + checkDir($dir,"input bin"); +} + +# +# parameters from user config file: +# + +# minimum passed ssnv_nt Q-score: +my $minQSSNT=$userconfig->{ssnvQuality_LowerBound}; +# minimum passed sindel_nt Q-score: +my $minQSINT=$userconfig->{sindelQuality_LowerBound}; + +# +# filtration parameters from user config file: +# + +# skip depth filters for targeted resequencing: +my $isUseDepthFilter=(! $userconfig->{isSkipDepthFilters}); +# multiple of the normal mean coverage to filter snvs and indels +my $depthFilterMultiple=$userconfig->{depthFilterMultiple}; +# max filtered basecall fraction for any sample +my $snvMaxFilteredBasecallFrac=$userconfig->{snvMaxFilteredBasecallFrac}; +# max snv spanning-deletion fraction for any sample +my $snvMaxSpanningDeletionFrac=$userconfig->{snvMaxSpanningDeletionFrac}; +# max indel reference repeat length +my $indelMaxRefRepeat=$userconfig->{indelMaxRefRepeat}; +# max indel window filter fraction +my $indelMaxWindowFilteredBasecallFrac=$userconfig->{indelMaxWindowFilteredBasecallFrac}; +# max indel interupted hompolymer length: +my $indelMaxIntHpolLength=$userconfig->{indelMaxIntHpolLength}; + + +# first we want the normal sample mean chromosome depth: +# +my $filterCoverage; +if($isUseDepthFilter) { + my $totalCoverage = 0; + for my $binId (@$binList) { + my $sFile = File::Spec->catfile($chromDir,'bins',$binId,'strelka.stats'); + checkFile($sFile,"strelka bin stats"); + open(my $sFH, '<', $sFile) + || errorX("Can't open file: '$sFile' $!"); + + my $is_found=0; + while(<$sFH>) { + next unless(/^NORMAL_NO_REF_N_COVERAGE\s/); + my $is_bad = 0; + if(not /sample_size:\s*(\d+)\s+/) { $is_bad=1; } + my $ss=$1; + # leave the regex for mean fairly loose to pick up scientific notation, etc.. + if(not /mean:\s*(\d[^\s]*|nan)\s+/) { $is_bad=1; } + my $mean=$1; + errorX("Unexpected format in file: '$sFile'") if($is_bad); + + my $coverage = ( $ss==0 ? 0 : int($ss*$mean) ); + + $totalCoverage += $coverage; + $is_found=1; + last; + } + close($sFH); + errorX("Unexpected format in file: '$sFile'") unless($is_found); + } + + my $chromKnownSize = $config->{derived}{$chromKnownSizeKey}; + my $normalMeanCoverage = ($totalCoverage/$chromKnownSize); + $filterCoverage = $normalMeanCoverage*$depthFilterMultiple; +} + + +# add filter description to vcf header unless it already exists +# return 1 if filter id already exists, client can decide if this is an error +# +sub add_vcf_filter($$$) { + my ($vcf,$id,$desc) = @_; + return 1 if(scalar(@{$vcf->get_header_line(key=>'FILTER', ID=>$id)})); + $vcf->add_header_line({key=>'FILTER', ID=>$id,Description=>$desc}); + return 0; +} + + +sub check_vcf_for_sample($$$) { + my ($vcf,$sample,$file) = @_; + my $is_found=0; + for ($vcf->get_samples()) { + $is_found=1 if($_ eq $sample); + } + errorX("Failed to find sample '$sample' in vcf file '$file'") unless($is_found); +} + + + +my $depthFiltId="DP"; + + +# Runs all post-call vcf filters: +# +sub filterSnvFileList(\@$$$) { + my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_; + + my $baseFiltId="BCNoise"; + my $spanFiltId="SpanDel"; + my $qFiltId="QSS_ref"; + + open(my $aFH,'>',$acceptFileName) + or errorX("Failed to open file: '$acceptFileName'"); + + my $is_first=1; + for my $ifile (@$ifiles) { + open(my $iFH,'<',"$ifile") + or errorX("Failed to open file: '$ifile'"); + my $vcf = Vcf->new(fh=>$iFH); + + # run some simple header validation on each vcf: + $vcf->parse_header(); + check_vcf_for_sample($vcf,'NORMAL',$ifile); + check_vcf_for_sample($vcf,'TUMOR',$ifile); + + if($is_first) { + # TODO: update vcf meta-data for chromosome-level filtered files + # + $vcf->remove_header_line(key=>"cmdline"); + $vcf->add_header_line({key=>"cmdline",value=>$cmdline}); + if($isUseDepthFilter) { + $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal}); + add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample"); + } + add_vcf_filter($vcf,$baseFiltId,"Fraction of basecalls filtered at this site in either sample is at or above $snvMaxFilteredBasecallFrac"); + add_vcf_filter($vcf,$spanFiltId,"Fraction of reads crossing site with spanning deletions in either sample exceeeds $snvMaxSpanningDeletionFrac"); + add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or ssnv Q-score < $minQSSNT, ie calls with NT!=ref or QSS_NT < $minQSSNT"); + print $aFH $vcf->format_header(); + $is_first=0; + } + + while(my $x=$vcf->next_data_hash()) { + my $norm=$x->{gtypes}->{NORMAL}; + my $tumr=$x->{gtypes}->{TUMOR}; + + my %filters; + + # normal depth filter: + my $normalDP=$norm->{DP}; + if($isUseDepthFilter) { + $filters{$depthFiltId} = ($normalDP > $depthFilterVal); + } + + # filtered basecall fraction: + my $normal_filt=($normalDP>0 ? $norm->{FDP}/$normalDP : 0); + + my $tumorDP=$tumr->{DP}; + my $tumor_filt=($tumorDP>0 ? $tumr->{FDP}/$tumorDP : 0); + + $filters{$baseFiltId}=(($normal_filt >= $snvMaxFilteredBasecallFrac) or + ($tumor_filt >= $snvMaxFilteredBasecallFrac)); + + # spanning deletion fraction: + my $normalSDP=$norm->{SDP}; + my $normalSpanTot=($normalDP+$normalSDP); + my $normalSpanDelFrac=($normalSpanTot>0 ? ($normalSDP/$normalSpanTot) : 0); + + my $tumorSDP=$tumr->{SDP}; + my $tumorSpanTot=($tumorDP+$tumorSDP); + my $tumorSpanDelFrac=($tumorSpanTot>0 ? ($tumorSDP/$tumorSpanTot) : 0); + + $filters{$spanFiltId}=(($normalSpanDelFrac > $snvMaxSpanningDeletionFrac) or + ($tumorSpanDelFrac > $snvMaxSpanningDeletionFrac)); + + # Q-val filter: + $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or + ($x->{INFO}->{QSS_NT} < $minQSSNT)); + + $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters); + + print $aFH $vcf->format_line($x); + } + + $vcf->close(); + close($iFH); + } + + close($aFH); +} + + + +sub updateA2(\@$) { + my ($a2,$FH) = @_; + my $line=<$FH>; + unless(defined($line)) { errorX("Unexpected end of somatic indel window file"); } + chomp $line; + @$a2 = split("\t",$line); + unless(scalar(@$a2)) { errorX("Unexpected format in somatic indel window file"); } +} + + + +sub filterIndelFileList(\@$$$) { + my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_; + + my $repeatFiltId="Repeat"; + my $iHpolFiltId="iHpol"; + my $baseFiltId="BCNoise"; + my $qFiltId="QSI_ref"; + + open(my $aFH,'>',$acceptFileName) + or errorX("Failed to open file: '$acceptFileName'"); + + my $is_first=1; + for my $ifile (@$ifiles) { + open(my $iFH,'<',"$ifile") + or errorX("Failed to open somatic indel file: '$ifile'"); + + my $iwfile = $ifile . ".window"; + open(my $iwFH,'<',"$iwfile") + or errorX("Failed to open somatic indel window file: '$iwfile'"); + + my @a2; # hold window file data for one line in case we overstep... + + my $vcf = Vcf->new(fh=>$iFH); + + # run some simple header validation on each vcf: + $vcf->parse_header(); + check_vcf_for_sample($vcf,'NORMAL',$ifile); + check_vcf_for_sample($vcf,'TUMOR',$ifile); + + if($is_first) { + # TODO: update all vcf meta-data for chromosome-level filtered files + # + $vcf->remove_header_line(key=>"cmdline"); + $vcf->add_header_line({key=>"cmdline",value=>$cmdline}); + if($isUseDepthFilter) { + $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal}); + } + $vcf->add_header_line({key=>'FORMAT', ID=>'DP50',Number=>1,Type=>'Float',Description=>'Average tier1 read depth within 50 bases'}); + $vcf->add_header_line({key=>'FORMAT', ID=>'FDP50',Number=>1,Type=>'Float',Description=>'Average tier1 number of basecalls filtered from original read depth within 50 bases'}); + $vcf->add_header_line({key=>'FORMAT', ID=>'SUBDP50',Number=>1,Type=>'Float',Description=>'Average number of reads below tier1 mapping quality threshold aligned across sites within 50 bases'}); + if($isUseDepthFilter) { + add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample"); + } + add_vcf_filter($vcf,$repeatFiltId,"Sequence repeat of more than ${indelMaxRefRepeat}x in the reference sequence"); + add_vcf_filter($vcf,$iHpolFiltId,"Indel overlaps an interupted homopolymer longer than ${indelMaxIntHpolLength}x in the reference sequence"); + add_vcf_filter($vcf,$baseFiltId,"Average fraction of filtered basecalls within 50 bases of the indel exceeds $indelMaxWindowFilteredBasecallFrac"); + add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or sindel Q-score < $minQSINT, ie calls with NT!=ref or QSI_NT < $minQSINT"); + print $aFH $vcf->format_header(); + $is_first=0; + } + + while(my $x=$vcf->next_data_hash()) { + $vcf->add_format_field($x,'DP50'); + $vcf->add_format_field($x,'FDP50'); + $vcf->add_format_field($x,'SUBDP50'); + + my $norm=$x->{gtypes}->{NORMAL}; + my $tumr=$x->{gtypes}->{TUMOR}; + + my $chrom=$x->{CHROM}; + my $pos=int($x->{POS}); + + # get matching line from window file: + while((scalar(@a2)<2) or + (($a2[0] le $chrom) and (int($a2[1]) < $pos))) { + updateA2(@a2,$iwFH); + } + unless(scalar(@a2) and ($a2[0] eq $chrom) and (int($a2[1]) == $pos)) + { errorX("Can't find somatic indel window position.\nIndel line: " . $vcf->format_line($x) ); } + + # add window data to vcf record: + # + $norm->{DP50} = $a2[2]+$a2[3]; + $norm->{FDP50} = $a2[3]; + $norm->{SUBDP50} = $a2[4]; + $tumr->{DP50} = $a2[5]+$a2[6]; + $tumr->{FDP50} = $a2[6]; + $tumr->{SUBDP50} = $a2[7]; + + my %filters; + + # normal depth filter: + my $normalDP=$norm->{DP}; + if($isUseDepthFilter) { + $filters{$depthFiltId}=($normalDP > $depthFilterVal); + } + + # ref repeat + my $refRep=$x->{INFO}->{RC}; + $filters{$repeatFiltId}=(defined($refRep) and + ($refRep > $indelMaxRefRepeat)); + + # ihpol + my $iHpol=$x->{INFO}->{IHP}; + $filters{$iHpolFiltId}=(defined($iHpol) and + ($iHpol > $indelMaxIntHpolLength)); + + # base filt: + my $normWinFrac=( $norm->{DP50} ? $norm->{FDP50}/$norm->{DP50} : 0 ); + my $tumrWinFrac=( $tumr->{DP50} ? $tumr->{FDP50}/$tumr->{DP50} : 0 ); + $filters{$baseFiltId}=( ($normWinFrac >= $indelMaxWindowFilteredBasecallFrac) or + ($tumrWinFrac >= $indelMaxWindowFilteredBasecallFrac) ); + + # Q-val filter: + $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or + ($x->{INFO}->{QSI_NT} < $minQSINT)); + + $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters); + + print $aFH $vcf->format_line($x); + } + + $vcf->close(); + close($iFH); + close($iwFH); + } + + close($aFH); +} + + + +my @ssnvFiles; +my @sindelFiles; +for my $binId (@$binList) { + my $ssnvFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.snvs.unfiltered.vcf'); + my $sindelFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.indels.unfiltered.vcf'); + checkFile($ssnvFile,"bin snv file"); + checkFile($sindelFile,"bin indel file"); + push @ssnvFiles,$ssnvFile; + push @sindelFiles,$sindelFile; +} + +my $ssnvOutFile = File::Spec->catfile($chromDir,"somatic.snvs.vcf"); +filterSnvFileList(@ssnvFiles,$filterCoverage,$ssnvOutFile,$isUseDepthFilter); + +my $sindelOutFile = File::Spec->catfile($chromDir,"somatic.indels.vcf"); +filterIndelFileList(@sindelFiles,$filterCoverage,$sindelOutFile,$isUseDepthFilter); + + +1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/strelka.xml Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,35 @@ +<tool id="strelka2" name="Strelka 2" version="1.0.14"> + <requirements> + <requirement type="package" version="0.1.18">samtools</requirement> + <requirement type="package" version="0.1.11">vcftools</requirement> + </requirements> + <description>Strelka without dependencie built-in</description> + <command interpreter="python"> + strelka_wrapper.py + --tumorBam $tumorBam + --normalBam $normalBam + --refFile $refFile + --configFile $configFile + </command> + + <inputs> + <param format="bam" name="tumorBam" type="data" label="Tumor bam file"/> + <param format="bam" name="normalBam" type="data" label="Normal bam file"/> + <param format="fasta" name="refFile" type="data" label="ref fasta file"/> + <param format="ini" name="configFile" type="data" label="config file"/> + </inputs> + + <outputs> + <data format="vcf" name="output_vcf_1" label="${tool.name} on ${on_string} (passed.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.snvs.vcf" /> + <data format="vcf" name="output_vcf_2" label="${tool.name} on ${on_string} (passed.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.indels.vcf" /> + <data format="vcf" name="output_vcf_3" label="${tool.name} on ${on_string} (all.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.snvs.vcf" /> + <data format="vcf" name="output_vcf_4" label="${tool.name} on ${on_string} (all.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.indels.vcf" /> + </outputs> + <trackster_conf/> + + + <help> +Strelka, a method for somatic SNV and small indel detectipon from sequencing data of matched tumor-normal samples. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/strelka2.xml Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,126 @@ +<tool id="strelka88" name="Strelka good interface but no dependencies"> + + <description>Strelka good interface but no dependencies</description> + <command interpreter="python">strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam --refFile $refFile + #if $configuration.configuration_switch == 'Default': + --configFile Default + #else if $configuration.configuration_switch == 'Path': + --configFile $configuration.configFile + #else: + --configFile Custom + --depthFilterMultiple $configuration.depthFilterMultiple + --snvMaxFilteredBasecallFrac $configuration.snvMaxFilteredBasecallFrac + --snvMaxSpanningDeletionFrac $configuration.snvMaxSpanningDeletionFrac + --indelMaxRefRepeat $configuration.indelMaxRefRepeat + --indelMaxWindowFilteredBasecallFrac $configuration.indelMaxWindowFilteredBasecallFrac + --indelMaxIntHpolLength $configuration.indelMaxIntHpolLength + --ssnvPrior $configuration.ssnvPrior + --sindelPrior $configuration.sindelPrior + --ssnvNoise $configuration.ssnvNoise + --sindelNoise $configuration.sindelNoise + --ssnvNoiseStrandBiasFrac $configuration.ssnvNoiseStrandBiasFrac + --minTier1Mapq $configuration.minTier1Mapq + --minTier2Mapq $configuration.minTier2Mapq + --ssnvQuality_LowerBound $configuration.ssnvQuality_LowerBound + --sindelQuality_LowerBound $configuration.sindelQuality_LowerBound + --isWriteRealignedBam $configuration.isWriteRealignedBam + --binSize $configuration.binSize + --isSkipDepthFilters $configuration.isSkipDepthFilters + --maxInputDepth $configuration.maxInputDepth + #if $configuration.extra_arguments.extra_arguments_switch == 'Yes': + --extraStrelkaArguments $configuration.extra_arguments.extraStrelkaArguments + #end if + #end if + </command> + + <inputs> + <param format="bam" name="tumorBam" type="data" label="Tumor bam file"/> + <param format="bam" name="normalBam" type="data" label="Normal bam file"/> + <param format="fasta" name="refFile" type="data" label="ref fasta file"/> + + <conditional name="configuration"> + <param name="configuration_switch" type="select" label="how do you want to configure strelka"> + <option value="Default" selected="true">Default</option> + <option value="Path">Use a config file</option> + <option value="Custom">Custom</option> + </param> + <when value="Default"> + <!-- do nothing --> + </when> + <when value="Path"> + <param format="ini" name="configFile" type="data" label="config file"/> + </when> + <when value="Custom"> + <param name="binSize" type="integer" value="25000000" label="binSize" /> + <param name="minTier1Mapq" type="integer" value="20" min="0" max="40" help="between 0 and 40" label="minTier1Mapq" /> + <param name="minTier2Mapq" type="integer" value="5" min="0" max="5" help="between 0 and 5" label="minTier2Mapq" /> + <param name="isWriteRealignedBam" type="integer" value="0" label="isWriteRealignedBam" /> + <param name="ssnvPrior" type="float" value="0.000001" label="ssnvPrior" /> + <param name="sindelPrior" type="float" value="0.000001" label="sindelPrior" /> + <param name="ssnvNoise" type="float" value="0.0000005" label="ssnvNoise" /> + <param name="sindelNoise" type="float" value="0.000001" label="sindelNoise" /> + <param name="ssnvNoiseStrandBiasFrac" type="float" value="0.5" label="ssnvNoiseStrandBiasFrac" /> + <param name="ssnvQuality_LowerBound" type="integer" value="15" label="ssnvQuality_LowerBound" /> + <param name="sindelQuality_LowerBound" type="integer" value="30" label="sindelQuality_LowerBound" /> + <param name="isSkipDepthFilters" type="integer" value="1" label="isSkipDepthFilters" /> + <param name="depthFilterMultiple" type="float" value="3.0" label="depthFilterMultiple" /> + <param name="snvMaxFilteredBasecallFrac" type="float" value="0.4" label="snvMaxFilteredBasecallFrac" /> + <param name="snvMaxSpanningDeletionFrac" type="float" value="0.75" label="snvMaxSpanningDeletionFrac" /> + <param name="indelMaxRefRepeat" type="integer" value="8" label="indelMaxRefRepeat" /> + <param name="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" label="indelMaxWindowFilteredBasecallFrac" /> + <param name="indelMaxIntHpolLength" type="integer" value="14" label="indelMaxIntHpolLength" /> + <param name="maxInputDepth" type="integer" value="10000" label="maxInputDepth" /> + <conditional name="extra_arguments"> + <param name="extra_arguments_switch" type="select" label="Do you Want to add extraStrelkaArguments?"> + <option value="No" selected="true">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No"> + <!-- do nothing --> + </when> + <when value="Yes"> + <param name="extraStrelkaArguments" type="text" value="" label="extraStrelkaArguments" /> + </when> + </conditional> + </when> + </conditional> + <conditional name="conf_file"> + <param name="conf_file_switch" type="select" label="output conf_file ?"> + <option value="No" selected="true">No</option> + <option value="Yes">Yes</option> + </param> + </conditional> + + + + + </inputs> + + <outputs> + <data format="vcf" name="output1_vcf" label="${tool.name} on ${on_string}(passed.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.snvs.vcf"/> + <data format="vcf" name="output2_vcf" label="${tool.name} on ${on_string}(passed.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.indels.vcf"/> + <data format="vcf" name="output3_vcf" label="${tool.name} on ${on_string}(all.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.snvs.vcf"/> + <data format="vcf" name="output4_vcf" label="${tool.name} on ${on_string}(all.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.indels.vcf"/> + <data name="conf_file.ini" label="conf_file.ini" from_work_dir="StrelkaAnalysis/tmp/config.ini"> + <filter>conf_file.conf_file_switch == "Yes"</filter> + </data> + </outputs> + + <tests> + <test> + <param name="normalBam" ftype="bam" value="NA12891_dupmark_chr20_region.bam"/> + <param name="tumorBam" ftype="bam" value="NA12892_dupmark_chr20_region.bam"/> + <param name="refFile" ftype="fasta" value="chr20_860k_only.fa"/> + <param name="configuration_switch" value="Default"/> + <output name="output1_vcf" file="passed.somatic.snvs.vcf"/> + <output name="output2_vcf" file="passed.somatic.indels.vcf"/> + <output name="output3_vcf" file="all.somatic.snvs.vcf"/> + <output name="output4_vcf" file="all.somatic.indels.vcf"/> + </test> + </tests> + + <help> +Strelka, a method for somatic SNV and small indel detectipon from sequencing data of matched tumor-normal samples. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/strelka_config.sample Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,139 @@ + +; +; User configuration options for Strelka somatic small-variant caller +; workflow: +; + +[user] + +; +; isSkipDepthFilters should be set to 1 to skip depth filtration for +; whole exome or other targeted sequencing data +; +isSkipDepthFilters = 1 + +; +; strelka will not accept input reads above this depth (they will be skipped +; until the depth drops below this value). Set this value <= 0 to disable +; this feature. Using this filter will bound memory usage given extremely high +; depth input, but may be problematic in high-depth targeted sequencing +; applications. +; +maxInputDepth = 10000 + +; +; If the depth filter is not skipped, all variants which occur at a +; depth greater than depthFilterMultiple*chromosome mean depth will be +; filtered out. +; +depthFilterMultiple = 3.0 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of basecalls have been removed by the mismatch density +; filter in either sample. +; +snvMaxFilteredBasecallFrac = 0.4 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of overlapping reads contain deletions which span the SNV +; call site. +; +snvMaxSpanningDeletionFrac = 0.75 + +; +; Somatic indel calls are filtered if they represent an expansion or +; contraction of a repeated pattern with a repeat count greater than +; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8, +; then the indel is filtered when it is an expansion/contraction of a +; homopolymer longer than 8 bases, a dinucleotide repeat longer than +; 16 bases, etc.) +; +indelMaxRefRepeat = 8 + +; +; Somatic indel calls are filtered if greater than this fraction of +; basecalls in a window extending 50 bases to each side of an indel's +; call position have been removed by the mismatch density filter. +; +indelMaxWindowFilteredBasecallFrac = 0.3 + +; +; Somatic indels are filtered if they overlap ’interrupted +; homopolymers’ greater than this length. The term 'interrupted +; homopolymer' is used to indicate the longest homopolymer which can +; be found intersecting or adjacent to the called indel when a single +; non-homopolymer base is allowed. +; +indelMaxIntHpolLength = 14 + +; +; prior probability of a somatic snv or indel +; +ssnvPrior = 0.000001 +sindelPrior = 0.000001 + +; +; probability of an snv or indel noise allele +; +; NB: in the calling model a noise allele is shared in tumor and +; normal samples, but occurs at any frequency. +; +ssnvNoise = 0.0000005 +sindelNoise = 0.000001 + +; +; Fraction of snv noise attributed to strand-bias. +; +; It is not recommended to change this setting. However, if it is +; essential to turn the strand bias penalization off, the following is +; recommended: +; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, +; (1) set ssnvNoiseStrandBiasFrac = 0 +; (2) divide the current ssnvNoise value by 2 +; +ssnvNoiseStrandBiasFrac = 0.5 + +; +; minimum MAPQ score for PE reads at tier1: +; +minTier1Mapq = 20 + +; +; minimum MAPQ score for PE and SE reads at tier2: +; +minTier2Mapq = 5 + +; +; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are +; marked as filtered: +; +ssnvQuality_LowerBound = 15 + +; +; Somatic quality score (QSI_NT, NT=ref) below which somatic indels +; are marked as filtered: +; +sindelQuality_LowerBound = 30 + +; +; Optionally write out read alignments which were altered during the +; realignment step. At the completion of the workflow run, the +; realigned reads can be found in: +; +; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam +; +isWriteRealignedBam = 0 + +; +; Jobs are parallelized over segments of the reference genome no larger +; than this size: +; +binSize = 25000000 + +; +; Additional arguments passed to strelka. +; +extraStrelkaArguments = +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/strelka_wrapper.py Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,201 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for running the GenomeAnalysisTK.jar commands. +""" + +from __future__ import print_function +import sys, argparse, os, tempfile, subprocess, shutil +from binascii import unhexlify +from string import Template +from galaxy import eggs +#import pkg_resources; pkg_resources.require( "bx-python" ) + +#GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is +#GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed +#DEFAULT_GATK_PREFIX = "gatk_file" +#CHUNK_SIZE = 2**20 #1mb +# +# +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def _create_config(args, config_path): + conf_file = open(config_path, "w") + conf_file.write("[user]\n") + for option in args: + if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None: + conf_file.write("%s=%s\n" % (option, args[option])) + conf_file.close() + +def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error): + stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name + proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) ) + return_code = proc.wait() + if return_code: + for line in open( stderr_name ): + print(line, file=sys.stderr) + os.unlink( stderr_name ) #clean up + cleanup_before_exit( tmp_dir ) + raise Exception( msg_error ) + else: + os.unlink( stderr_name ) + +def index_bam_files( bam_filenames, tmp_dir ): + for bam_filename in bam_filenames: + bam_index_filename = "%s.bai" % bam_filename + print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename)) + if not os.path.exists( bam_index_filename ): + #need to index this bam file + command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) + my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename) + +def index_fasta_files( fasta_filenames, tmp_dir ): + for fasta_filename in fasta_filenames: + fasta_index_filename = "%s.fai" % fasta_filename + print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename)) + if not os.path.exists( fasta_index_filename ): + #need to index this bam file + command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) + my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) + +def __main__(): + #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD + #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD + + root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2" + expected_dir="for_tests" + job_dir=os.getcwd() + analysis_dir=job_dir + "/StrelkaAnalysis" + config_script=root_dir + "/configureStrelkaWorkflow.pl" + tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) + config_ini = "%s/config.ini" % (tmp_dir) + + print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) + + #manage parsing + parser = argparse.ArgumentParser() + parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) + parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) + parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) + parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) + parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False ) + parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False ) + parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False ) + parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False ) + parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False ) + parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False ) + parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False ) + parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False ) + parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False ) + parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False ) + parser.add_argument( '--binSize', help='path to tumor bam file', required = False ) + parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False ) + parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False ) + parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False ) + args = parser.parse_args() + + #verifying eveything's ok + if not os.path.isfile(config_script): + sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") + print("configuring...", file=sys.stdout) + if os.path.exists(analysis_dir): + sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") + + + # creating index if needed + os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] + bam_filenames = [ args.tumorBam, args.normalBam ] + index_bam_files( bam_filenames, tmp_dir ) + fasta_files = [ args.refFile ] + index_fasta_files( fasta_files, tmp_dir ) + + #creating config file if needed + if args.configFile == "Custom": + _create_config(vars(args), config_ini) + elif args.configFile == "Default": + cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini) + my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") + else: + if not os.path.exists(args.configFile): + print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) + cmdbash="cp %s %s" % (args.configFile, config_ini) + my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") + + + + + #configuration of workflow + cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir) + print( "**** Starting configuration.") + print( "**** Configuration cmd: '" + cmd + "'") + my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !") + print("completed configuration") + + #run the workflow ! + cmd="make -C " + analysis_dir + print("**** starting workflow.") + print("**** workflow cmd: '" + cmd + "'") + my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") + print("**** completed workflow execution") + + + + + + + + + + + + + + +#bam_filenames = [] +# if options.datasets: +# for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: +# gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function +# if dataset_arg: +# cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) +# if galaxy_ext == "bam": +# bam_filenames.append( gatk_filename ) +# #set up stdout and stderr output options +# stdout = open_file_from_option( options.stdout, mode = 'wb' ) +# stderr = open_file_from_option( options.stderr, mode = 'wb' ) +# #if no stderr file is specified, we'll use our own +# if stderr is None: +# stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) +# +# proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) +# return_code = proc.wait() +# +# if return_code: +# stderr_target = sys.stderr +# else: +# stderr_target = sys.stdout +# stderr.flush() +# stderr.seek(0) +# while True: +# chunk = stderr.read( CHUNK_SIZE ) +# if chunk: +# stderr_target.write( chunk ) +# else: +# break +# stderr.close() +# #generate html reports +# if options.html_report_from_directory: +# for ( html_filename, html_dir ) in options.html_report_from_directory: +# html_report_from_directory( open( html_filename, 'wb' ), html_dir ) +# +# cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka2/tool_dependencies.xml Thu Sep 25 11:59:08 2014 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="samtools" version="0.1.18"> + <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="vcftools" version="0.1.11"> + <repository changeset_revision="710efaae2ff8" name="package_vcftools_0_1_11" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>