# HG changeset patch
# User mini
# Date 1411724094 14400
# Node ID 7c35d3efd1e75e029e8d00cdfd06623d8e43f424
# Parent f48854499d41f09fb85b6e6bc793888f989d4968
Deleted selected files
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.configureStrelkaWorkflow.pl.swp
Binary file strelka2/.configureStrelkaWorkflow.pl.swp has changed
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.strelka.xml.swp
Binary file strelka2/.strelka.xml.swp has changed
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.strelka_wrapper.bash.swo
Binary file strelka2/.strelka_wrapper.bash.swo has changed
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.strelka_wrapper.py.swp
Binary file strelka2/.strelka_wrapper.py.swp has changed
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/configureStrelkaWorkflow.pl
--- a/strelka2/configureStrelkaWorkflow.pl Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,564 +0,0 @@
-#!/usr/bin/env perl
-
-=head1 LICENSE
-
-Strelka Workflow Software
-Copyright (c) 2009-2013 Illumina, Inc.
-
-This software is provided under the terms and conditions of the
-Illumina Open Source Software License 1.
-
-You should have received a copy of the Illumina Open Source
-Software License 1 along with this program. If not, see
-.
-
-=head1 SYNOPSIS
-
-configureStrelkaWorkflow.pl --tumor FILE --normal FILE --ref FILE --config FILE [options]
-
-This script configures the strelka workflow for somatic variant
-calling on matched tumor-normal BAM files. The configuration process
-will produce an analysis makefile and directory structure. The
-makefile can be used to run the analysis on a workstation or compute
-cluster via make/qmake or other makefile compatible process.
-
-=head1 ARGUMENTS
-
-=over 4
-
-=item --tumor FILE
-
-Path to tumor sample BAM file (required)
-
-=item --normal FILE
-
-Path to normal sample BAM file (required)
-
-=item --ref FILE
-
-Path to reference genome fasta (required)
-
-=item --config FILE
-
-Strelka configuration file. Default config files can be found in
-${STRELKA_INSTALL_ROOT}/etc/ for both ELAND and BWA alignments. (required)
-
-=back
-
-=head1 OPTIONS
-
-=over 4
-
-=item --output-dir DIRECTORY
-
-Root of the analysis directory. This script will place all
-configuration files in the analysis directory, and after configuration
-all results and intermediate files will be written within the analysis
-directory during a run. This directory must not already
-exist. (default: ./strelkaAnalysis)
-
-=back
-
-=cut
-
-use warnings FATAL => 'all';
-use strict;
-
-use Carp;
-$SIG{__DIE__} = \&Carp::confess;
-
-use Cwd qw(getcwd);
-use File::Spec;
-use Getopt::Long;
-use Pod::Usage;
-
-my $baseDir;
-my $libDir;
-BEGIN {
- my $thisDir=(File::Spec->splitpath($0))[1];
- $baseDir=$thisDir;#$baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
- $libDir=File::Spec->catdir($baseDir,'lib');
-}
-use lib $libDir;
-use Utils;
-
-if(getAbsPath($baseDir)) {
- errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
-}
-my $libexecDir=File::Spec->catdir($baseDir,'libexec');
-#my $optDir=File::Spec->catdir($baseDir,'opt');
-
-
-my $scriptName=(File::Spec->splitpath($0))[2];
-my $argCount=scalar(@ARGV);
-my $cmdline = join(' ',$0,@ARGV);
-
-
-sub usage() { pod2usage(-verbose => 1,
- -exitval => 2); }
-
-#
-# user configuration:
-#
-
-my ($tumorBam, $normalBam, $refFile, $configFile, $outDir);
-my $help;
-
-GetOptions( "tumor=s" => \$tumorBam,
- "normal=s" => \$normalBam,
- "ref=s" => \$refFile,
- "config=s" => \$configFile,
- "output-dir=s" => \$outDir,
- "help|h" => \$help) || usage();
-
-usage() if($help);
-usage() unless($argCount);
-
-
-#
-# Validate input conditions:
-#
-
-sub checkFileArg($$) {
- my ($file,$label) = @_;
-
- errorX("Must specify $label file") unless(defined($file)); #raise an error if file not defined
- checkFile($file,$label);
-}
-
-checkFileArg($tumorBam,"tumor BAM");
-checkFileArg($normalBam,"normal BAM");
-checkFileArg($refFile,"reference fasta");
-checkFileArg($configFile,"configuration ini");
-
-sub makeAbsoluteFilePaths(\$) {
- my ($filePathRef) = @_;
-
- my ($v,$fileDir,$fileName) = File::Spec->splitpath($$filePathRef);
- if(getAbsPath($fileDir)) {
- errorX("Can't resolve directory path for '$fileDir' from input file argument: '$$filePathRef'");
- }
- $$filePathRef = File::Spec->catfile($fileDir,$fileName);
-}
-
-makeAbsoluteFilePaths($tumorBam);
-makeAbsoluteFilePaths($normalBam);
-makeAbsoluteFilePaths($refFile);
-makeAbsoluteFilePaths($configFile);
-
-# also check for BAM index files:
-sub checkBamIndex($) {
- my ($file) = @_;
- my $ifile = $file . ".bai";
- if(! -f $ifile) {
- errorX("Can't find index for BAM file '$file'");
- }
-}
-
-checkBamIndex($tumorBam);
-checkBamIndex($normalBam);
-
-
-sub checkFaIndex($) {
- my ($file) = @_;
- my $ifile = $file . ".fai";
- if(! -f $ifile) {
- errorX("Can't find index for fasta file '$file'");
- }
- # check that fai file isn't improperly formatted (a la the GATK bundle NCBI 37 fai files)
- open(my $FH,"< $ifile") || errorX("Can't open fai file '$ifile'");
- my $lineno=1;
- while(<$FH>) {
- chomp;
- my @F=split();
- if(scalar(@F) != 5) {
- errorX("Unexpected format for line number '$lineno' of fasta index file: '$ifile'\n\tRe-running fasta indexing may fix the issue. To do so, run: \"samtools faidx $file\"");
- }
- $lineno++;
- }
- close($FH);
-}
-
-checkFaIndex($refFile);
-
-
-if(defined($outDir)) {
- if(getAbsPath($outDir)) {
- errorX("Can't resolve path for ouput directory: '$outDir'");
- }
-} else {
- $outDir=File::Spec->catdir(Cwd::getcwd(),'strelkaAnalysis');
-}
-
-if(-e $outDir) {
- errorX("Output path already exists: '$outDir'");
-}
-
-if(getAbsPath($baseDir)) {
- errorX("Can't resolve path for strelka install directory: '$baseDir'");
-}
-
- #my $samtoolsDir = File::Spec->catdir($optDir,'samtools');
-checkDir($libexecDir,"strelka libexec");
- #checkDir($samtoolsDir,"samtools");
-
-my $callScriptName = "callSomaticVariants.pl";
-my $filterScriptName = "filterSomaticVariants.pl";
-my $finishScriptName = "consolidateResults.pl";
-my $callScript = File::Spec->catfile($libexecDir,$callScriptName);
-my $filterScript = File::Spec->catfile($libexecDir,$filterScriptName);
-my $finishScript = File::Spec->catfile($libexecDir,$finishScriptName);
-my $countFasta = File::Spec->catfile($libexecDir,"countFastaBases");
- #my $samtoolsBin = File::Spec->catfile($samtoolsDir,"samtools");
-checkFile($callScript,"somatic variant call script");
-checkFile($filterScript,"somatic variant filter script");
-checkFile($finishScript,"result consolidation script");
-checkFile($countFasta,"fasta scanner");
-#checkFile($samtoolsBin,"samtools");
-
-#
-# Configure bin runs:
-#
-checkMakeDir($outDir);
-
-#
-# Configure bin runs: open and validate config ini
-#
-my $config = parseConfigIni($configFile);
-
-sub checkConfigKeys($) {
- my ($keyref) = @_;
- for (@$keyref) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
- }
-}
-
-# these are the keys we need at configuration time:
-my @config_keys = qw(binSize);
-
-# these are additional keys we will need to run the workflow:
-# (note we don't check for (maxInputDepth,minTier2Mapq) for back compatibility with older config files)
-my @workflow_keys = qw(
-minTier1Mapq isWriteRealignedBam ssnvPrior sindelPrior ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac
-ssnvQuality_LowerBound sindelQuality_LowerBound isSkipDepthFilters depthFilterMultiple
-snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac indelMaxRefRepeat
-indelMaxWindowFilteredBasecallFrac indelMaxIntHpolLength);
-
-checkConfigKeys(\@config_keys);
-checkConfigKeys(\@workflow_keys);
-
-
-my $binSize = int($config->{user}{binSize});
-
-$config->{derived}{configurationCmdline} = $cmdline;
-$config->{derived}{normalBam} = $normalBam;
-$config->{derived}{tumorBam} = $tumorBam;
-$config->{derived}{refFile} = $refFile;
-$config->{derived}{outDir} = $outDir;
-
-#
-# Configure bin runs: check for consistent chrom info between BAMs and reference
-#
-sub getBamChromInfo($) {
- my $file = shift;
- my $cmd = "samtools view -H $file |";
- open(my $FH,$cmd) || errorX("Can't open process $cmd");
-
- my %info;
- my $n=0;
- while(<$FH>) {
- next unless(/^\@SQ/);
- chomp;
- my @F = split(/\t/);
- scalar(@F) >= 3 || errorX("Unexpected bam header for file '$file'");
-
- my %h = ();
- foreach (@F) {
- my @vals = split(':');
- $h{$vals[0]} = $vals[1];
- }
- $F[1] = $h{'SN'};
- $F[2] = $h{'LN'};
-
- my $size = int($F[2]);
- ($size > 0) || errorX("Unexpected chromosome size '$size' in bam header for file '$file'");
- $info{$F[1]}{size} = $size;
- $info{$F[1]}{order} = $n;
- $n++;
- }
- close($FH) || errorX("Can't close process $cmd");
- return %info;
-}
-
-
-my %chromInfo = getBamChromInfo($normalBam);
-my @chroms = sort { $chromInfo{$a}{order} <=> $chromInfo{$b}{order} } (keys(%chromInfo));
-
-{
- #consistency check:
- my %tumorChromInfo = getBamChromInfo($tumorBam);
- for my $chrom (@chroms) {
- my $ln = $chromInfo{$chrom}{size};
- my $tln = $tumorChromInfo{$chrom}{size};
- my $order = $chromInfo{$chrom}{order};
- my $torder = $tumorChromInfo{$chrom}{order};
- unless(defined($tln) && ($tln==$ln) && ($torder==$order)) {
- errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'");
- }
- delete $tumorChromInfo{$chrom};
- }
- for my $chrom (keys(%tumorChromInfo)) {
- errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'");
- }
-}
-
-
-my %refChromInfo;
-logX("Scanning reference genome");
-{
- my $knownGenomeSize=0;
- my $cmd="$countFasta $refFile |";
- open(my $FFH,$cmd) || errorX("Failed to open process '$cmd'");
-
- while(<$FFH>) {
- chomp;
- my @F = split(/\t/);
- scalar(@F) == 4 || errorX("Unexpected value from '$cmd'");
- $knownGenomeSize += int($F[2]);
- $refChromInfo{$F[1]}{knownSize} = int($F[2]);
- $refChromInfo{$F[1]}{size} = int($F[3]);
- }
- close($FFH) || errorX("Failed to close process '$cmd'");
-
- #consistency check:
- for my $chrom (@chroms) {
- my $ln = $chromInfo{$chrom}{size};
- my $rln = $refChromInfo{$chrom}{size};
- unless(defined($rln) && ($rln==$ln)) {
- errorX("BAM headers and reference fasta disagree on chromosome: '$chrom'");
- }
- $config->{derived}{"chrom_${chrom}_size"} = $rln;
- $config->{derived}{"chrom_${chrom}_knownSize"} = $refChromInfo{$chrom}{knownSize};
- }
- $config->{derived}{chromOrder} = join("\t",@chroms);
-
- $config->{derived}{knownGenomeSize} = $knownGenomeSize;
-}
-logX("Scanning reference genome complete");
-
-
-
-#
-# Configure bin runs: create directory structure
-#
-my $resultsDir = File::Spec->catdir($outDir,'results');
-checkMakeDir($resultsDir);
-if($config->{user}{isWriteRealignedBam}) {
- my $bamDir = File::Spec->catdir($outDir,'realigned');
- checkMakeDir($bamDir);
-}
-my $chromRootDir = File::Spec->catdir($outDir,'chromosomes');
-checkMakeDir($chromRootDir);
-for my $chrom (@chroms) {
- my $chromDir = File::Spec->catdir($chromRootDir,$chrom);
- checkMakeDir($chromDir);
-
- my $chromRef = $chromInfo{$chrom};
- $chromRef->{dir} = $chromDir;
- $chromRef->{binList} = getBinList($chromRef->{size},$binSize);
-
- my $binRootDir = File::Spec->catdir($chromDir,'bins');
- checkMakeDir($binRootDir);
-
- for my $binId ( @{$chromRef->{binList}} ) {
- my $binDir = File::Spec->catdir($binRootDir,$binId);
- checkMakeDir($binDir);
- }
-}
-
-
-
-#
-# write run config file:
-#
-my $runConfigFile;
-{
- my $cstr = <catdir($outDir,'config');
- checkMakeDir($configDir);
- $runConfigFile = File::Spec->catdir($configDir,'run.config.ini');
- open(my $FH,"> $runConfigFile") || errorX("Can't open file '$runConfigFile'");
- print $FH $cstr;
- close($FH);
-}
-
-
-
-#
-# create makefile
-#
-my $makeFile = File::Spec->catfile($outDir,"Makefile");
-open(my $MAKEFH, "> $makeFile") || errorX("Can't open file: '$makeFile'");
-
-my $completeFile = "task.complete";
-
-print $MAKEFH <.
-
-=cut
-
-
-package Utils;
-
-use base 'Exporter';
-
-our @EXPORT = qw(
- errorX logX executeCmd checkFile checkDir checkMove
- getAbsPath checkMakeDir getBinList
- parseConfigIni writeConfigIni
- );
-
-use warnings FATAL => 'all';
-use strict;
-
-use Carp;
-use Cwd qw(realpath);
-use File::Copy qw(move);
-use File::Path qw(mkpath);
-
-
-sub errorX($) {
- confess "\nERROR: " . $_[0] . "\n\n";
-}
-
-sub logX($) {
- print STDERR "INFO: " . $_[0] . "\n";
-}
-
-
-sub executeCmd($;$) {
- my $cmd = shift;
- my $isVerbose = shift;
-
- logX("Running: '$cmd'") if(defined($isVerbose) and $isVerbose);
- system($cmd) == 0
- or errorX("Failed system call: '$cmd'");
-}
-#return an error if file does not exist
-sub checkFile($;$) {
- my $file = shift;
- return if(-f $file);
- my $label = shift;
- errorX("Can't find" . (defined($label) ? " $label" : "") . " file: '$file'");
-}
-#return an error if file does not Exist
-sub checkDir($;$) {
- my $dir = shift;
- return if(-d $dir);
- my $label = shift;
- errorX("Can't find" . (defined($label) ? " $label" : "") . " directory: '$dir'");
-}
-
-sub checkMove($$) {
- my ($old,$new) = @_;
- move($old,$new) || errorX("File move failed: $!\n\tAttempting to move '$old' to '$new'");
-}
-
-
-
-
-=item getAbsPath($path)
-
-This procedure attempts to convert a path provided by the user on the
-command line into an absolute path. It should be able to handle "~"
-paths and conventional relative paths using ".." or ".". Resolution of
-links should follow the convention of "Cwd::realpath".
-
-B
-
- $dirRef - path (converted to absolute path in place)
-
-B
-
- returns zero if successful, non-zero otherwise.
-
-=cut
-sub getAbsPath(\$) {
- my ($dirRef) = @_;
- my @tmp=glob($$dirRef);
- return 1 if(scalar(@tmp) != 1);
- my $ret = Cwd::realpath($tmp[0]);
- return 1 if !$ret && !($ret = File::Spec->rel2abs($tmp[0]));
- $$dirRef = $ret;
- return 0;
-}
-
-
-#verify path is not a file, then create a directory with this name if does not exist
-sub checkMakeDir($) {
- my $dir = shift;
- unless (-e $dir) {
- File::Path::mkpath($dir) || errorX("Can't create directory '$dir'");
- } else {
- errorX("Path is not a directory '$dir'\n") unless(-d $dir);
- }
-}
-
-
-
-sub getBinList($$) {
- my ($chromSize,$binSize) = @_;
-
- my $nm1 = (($chromSize-1) / $binSize);
- return [ map {sprintf("%04i",$_)} (0..$nm1) ];
-}
-
-
-
-sub parseConfigError($$) {
- my ($file,$line) = @_;
- errorX("Config file '$file' contains unexpected line '$line'\n");
-}
-
-#lis le fichier de config, si la ligne est de type : some space + [ some character ] + some space then register ther character in $section. ( [user] , then $section=user).
-sub parseConfigIni($) {
- my $file = shift;
- my %config;
- open(my $FH,"< $file") || errorX("Can't open config file '$file'");
- my $section = "noSection";
- while(<$FH>) {
- next if(/^[;#]/);
- next if(/^\s*$/);
- chomp;
- my $line=$_;
- my @ncl = split(/[;#]/);
- next unless(scalar(@ncl));
- my $nc = $ncl[0];
- if($nc =~ /^\s*\[([^\]]*)\]\s*$/) {
- $section = $1;
- next;
- }
- my ($key,$val) = map { s/^\s+//; s/\s+$//; $_ } split(/=/,$nc,2);
- unless(defined($key) && defined($val) && ($key ne "")) { parseConfigError($file,$line); }
-
- $config{$section}{$key} = $val;
- }
- close($FH);
- return \%config;
-}
-
-
-# minimal ini stringifier:
-#
-sub writeConfigIni($) {
- my $config = shift;
- my $val = "";
- for my $section (sort(keys(%$config))) {
- $val .= "\n[$section]\n";
- for my $key (sort(keys(%{$config->{$section}}))) {
- $val .= "$key = " . $config->{$section}{$key} . "\n";
- }
- }
- return $val;
-}
-
-
-1;
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/callSomaticVariants.pl
--- a/strelka2/libexec/callSomaticVariants.pl Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,241 +0,0 @@
-#!/usr/bin/env perl
-print "\n";
-print @INC;
-print "\n";
-=head1 LICENSE
-
-Strelka Workflow Software
-Copyright (c) 2009-2013 Illumina, Inc.
-
-This software is provided under the terms and conditions of the
-Illumina Open Source Software License 1.
-
-You should have received a copy of the Illumina Open Source
-Software License 1 along with this program. If not, see
-.
-
-=head1 SYNOPSIS
-
-callSomaticVariants.pl [options] | --help
-
-=head2 SUMMARY
-
-Run the somatic variant caller for snvs and indels on a single
-chromosome bin.
-
-=cut
-
-use warnings FATAL => 'all';
-use strict;
-
-use Carp;
-$SIG{__DIE__} = \&Carp::confess;
-
-use File::Spec;
-use Getopt::Long;
-use Pod::Usage;
-
-my $baseDir;
-my $libDir;
-BEGIN {
-print "\n";
-print @INC;
-print "\n";
-
- my $thisDir=(File::Spec->splitpath($0))[1];
- $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
- $libDir=File::Spec->catdir($baseDir,'lib');
-}
-use lib $libDir;
-use Utils;
-
-if(getAbsPath($baseDir)) {
- errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
-}
-my $libexecDir=File::Spec->catdir($baseDir,'libexec');
-#my $optDir=File::Spec->catdir($baseDir,'opt');
-
-
-my $scriptName=(File::Spec->splitpath($0))[2];
-my $argCount=scalar(@ARGV);
-my $cmdline = join(' ',$0,@ARGV);
-
-
-my ($chrom, $binId, $configFile);
-my $help;
-
-GetOptions(
- "chrom=s" => \$chrom,
- "bin=s" => \$binId,
- "config=s" => \$configFile,
- "help|h" => \$help) or pod2usage(2);
-
-pod2usage(2) if($help);
-pod2usage(2) unless(defined($chrom));
-pod2usage(2) unless(defined($binId));
-pod2usage(2) unless(defined($configFile));
-
-
-
-#
-# check all fixed paths (not based on commandline arguments):
-#
-checkDir($baseDir);
-checkDir($libexecDir);
-#checkDir($optDir);
-
-my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
-checkFile($strelkaBin,"strelka binary");
-#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
-#checkFile($samtoolsBin,"samtools binary");
-
-
-
-#
-# read config and validate values
-#
-checkFile($configFile,"configuration ini");
-my $config = parseConfigIni($configFile);
-
-for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
-}
-
-# we skip maxInputDepth,minTier2Mapq here to allow older config files
-for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
- ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
-}
-
-my $outDir = $config->{derived}{outDir};
-my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
-checkDir($outDir,"output");
-checkDir($binDir,"output bin");
-
-
-my $tumorBam = $config->{derived}{tumorBam};
-my $normalBam = $config->{derived}{normalBam};
-my $refFile = $config->{derived}{refFile};
-checkFile($tumorBam,"tumor BAM");
-checkFile($normalBam,"normal BAM");
-checkFile($refFile,"reference");
-
-
-# pull out some config options for convenience:
-my $binSize=$config->{user}{binSize};
-my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
-my $knownGenomeSize = $config->{derived}{knownGenomeSize};
-
-
-my $begin = (int($binId)*$binSize)+1;
-my $end = ((int($binId)+1)*$binSize);
-#my $end = $begin+100000; #debug mode
-
-my $useroptions = $config->{user};
-
-# set previous default value if an older config file is being used:
-if(! defined($useroptions->{minTier2Mapq})) {
- $useroptions->{minTier2Mapq} = 5;
-}
-
-
-#
-# setup the strelka command-line:
-#
-my $strelka_base_opts= "-clobber" .
-" -filter-unanchored" .
-" -min-paired-align-score " . $useroptions->{minTier1Mapq} .
-" -min-single-align-score 10" .
-" -min-qscore 0" .
-" -report-range-begin $begin -report-range-end $end" .
-" -samtools-reference '$refFile'" .
-" -max-window-mismatch 3 20 -print-used-allele-counts" .
-" -bam-seq-name '" . $chrom . "'" .
-" -genome-size $knownGenomeSize" .
-" -max-indel-size 50" .
-" -indel-nonsite-match-prob 0.5" .
-" --min-contig-open-end-support 35" .
-" --somatic-snv-rate " . $useroptions->{ssnvPrior} .
-" --shared-site-error-rate " . $useroptions->{ssnvNoise} .
-" --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
-" --somatic-indel-rate " . $useroptions->{sindelPrior} .
-" --shared-indel-error-rate " . $useroptions->{sindelNoise} .
-" --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
-" --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
-" --tier2-single-align-score-rescue-mode" .
-" --tier2-mismatch-density-filter-count 10" .
-" --tier2-no-filter-unanchored" .
-" --tier2-indel-nonsite-match-prob 0.25" .
-" --tier2-include-singleton" .
-" --tier2-include-anomalous";
-
-
-my $somSnvFile='somatic.snvs.unfiltered.vcf';
-my $somIndelFile='somatic.indels.unfiltered.vcf';
-
-my $cmd = "$strelkaBin $strelka_base_opts" .
-" -bam-file " . $normalBam .
-" --tumor-bam-file " . $tumorBam .
-" --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
-" --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
-" --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
-
-
-sub ualignFile($) {
- return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
-}
-sub alignFile($) {
- return File::Spec->catfile($binDir,$_[0] . ".realigned");
-}
-
-
-if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
- $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
-}
-
-
-if($isWriteRealignedBam) {
- $cmd .= " -realigned-read-file " . ualignFile("normal") .
- " --tumor-realigned-read-file " . ualignFile("tumor");
-}
-
-if(defined($useroptions->{extraStrelkaArguments})){
- my $arg=$useroptions->{extraStrelkaArguments};
- if($arg !~ /^\s*$/) {
- $cmd .= " " . $arg;
- }
-}
-
-# this file contains site stats that used to be printed on stdout:
-#
-$cmd .=
-" --report-file " . File::Spec->catfile($binDir,'strelka.stats');
-
-$cmd .=
-" >| " . File::Spec->catfile($binDir,'strelka.stdout') .
-" 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
-
-
-executeCmd($cmd,0);
-
-
-if($isWriteRealignedBam) {
- for my $label (qw(normal tumor)) {
- my $ufile = ualignFile($label);
- if( -f $ufile ) {
- my $afile = alignFile($label);
- my $cmd = "samtools sort " . $ufile . " " . $afile;
- executeCmd($cmd,0);
- unlink($ufile);
- } else {
- logX("Can't find unsorted realigned BAM file: '$ufile'");
- }
- }
-}
-
-
-1;
-
-__END__
-
-
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/consolidateResults.pl
--- a/strelka2/libexec/consolidateResults.pl Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,276 +0,0 @@
-#!/usr/bin/env perl
-
-=head1 LICENSE
-
-Strelka Workflow Software
-Copyright (c) 2009-2013 Illumina, Inc.
-
-This software is provided under the terms and conditions of the
-Illumina Open Source Software License 1.
-
-You should have received a copy of the Illumina Open Source
-Software License 1 along with this program. If not, see
-.
-
-=head1 SYNOPSIS
-
-consolidateSomaticVariants.pl [options] | --help
-
-=head2 SUMMARY
-
-Aggregate final results from all chromosomes
-
-=cut
-
-use warnings FATAL => 'all';
-use strict;
-
-use Carp;
-$SIG{__DIE__} = \&Carp::confess;
-
-use File::Spec;
-use File::Temp;
-use Getopt::Long;
-use Pod::Usage;
-
-my $baseDir;
-my $libDir;
-#my $optDir;
-#my $vcftDir;
-BEGIN {
- my $thisDir=(File::Spec->splitpath($0))[1];
- $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
- $libDir=File::Spec->catdir($baseDir,'lib');
- #$optDir=File::Spec->catdir($baseDir,'opt');
- #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl');
-}
-use lib $libDir;
-use Utils;
-#use lib $vcftDir;
-use Vcf;
-
-if(getAbsPath($baseDir)) {
- errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
-}
-#$optDir=File::Spec->catdir($baseDir,'opt');
-
-
-my $scriptName=(File::Spec->splitpath($0))[2];
-my $argCount=scalar(@ARGV);
-my $cmdline=join(' ',$0,@ARGV);
-
-
-my $configFile;
-my $help;
-
-GetOptions( "config=s" => \$configFile,
- "help|h" => \$help) or pod2usage(2);
-
-pod2usage(2) if($help);
-pod2usage(2) unless(defined($configFile));
-
-#
-# check fixed paths
-#
-#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
-#checkFile($samtoolsBin,"samtools binary");
-
-
-#
-# read config and validate values
-#
-checkFile($configFile,"configuration ini");
-my $config = parseConfigIni($configFile);
-
-
-for (qw(outDir chromOrder)) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
-}
-for (qw(isWriteRealignedBam binSize)) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
-}
-
-my $userconfig = $config->{user};
-
-my @chromOrder = split(/\t/,$config->{derived}{chromOrder});
-for my $chrom (@chromOrder) {
- my $chromSizeKey = "chrom_" . $chrom . "_size";
- errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey));
-}
-
-my $outDir = $config->{derived}{outDir};
-checkDir($outDir,"output");
-
-
-my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam};
-
-for my $chrom (@chromOrder) {
- my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
- checkDir($chromDir,"input chromosome");
-
- next unless($isWriteRealignedBam);
- my $chromSizeKey = "chrom_" . $chrom . "_size";
- my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
- for my $binId (@$binList) {
- my $dir = File::Spec->catdir($chromDir,'bins',$binId);
- checkDir($dir,"input bin");
- }
-}
-
-
-
-# suffix used for large result file intermediates:
-my $itag = ".incomplete";
-
-
-#
-# concatenate vcfs:
-#
-sub concatenateVcfs($) {
- my $fileName = shift;
-
- my $is_first = 1;
-
- my $allFileName = "all." . $fileName;
- my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag);
- open(my $aFH,'>',"$allFile")
- || errorX("Failed to open file: '$allFile'");
-
- # loop over all chroms once to create the header, and one more time for all the data:
- my $headervcf;
- for my $chrom (@chromOrder) {
- my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
- my $iFile = File::Spec->catfile($chromDir,$fileName);
- checkFile($iFile);
-
- my $depthKey="maxDepth_${chrom}";
-
- if($is_first) {
- open(my $iFH,'<',"$iFile")
- || errorX("Failed to open file: '$iFile'");
- $headervcf = Vcf->new(fh=>$iFH);
- $headervcf->parse_header();
- $headervcf->remove_header_line(key=>"cmdline");
- $headervcf->add_header_line({key=>"cmdline",value=>$cmdline});
- $headervcf->remove_header_line(key=>"$depthKey");
- close($iFH);
- $is_first=0;
- }
-
- {
- open(my $iFH,'<',"$iFile")
- || errorX("Failed to open file: '$iFile'");
- my $vcf = Vcf->new(fh=>$iFH);
- $vcf->parse_header();
- for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) {
- # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!!
- $headervcf->add_header_line($line->[0]);
- }
- $vcf->close();
- close($iFH);
- }
- }
- print $aFH $headervcf->format_header();
- $headervcf->close();
-
- for my $chrom (@chromOrder) {
- my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
- my $iFile = File::Spec->catfile($chromDir,$fileName);
-
- open(my $iFH,'<',"$iFile")
- || errorX("Failed to open file: '$iFile'");
-
- my $vcf = Vcf->new(fh=>$iFH);
- $vcf->parse_header();
- print $aFH $_ while(<$iFH>);
- }
-
- close($aFH);
-
- # make a second set of files with only the passed variants:
- my $passedFileName = "passed." . $fileName;
- my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag);
- open(my $pFH,'>',"$passedFile")
- || errorX("Failed to open file: '$passedFile'");
-
- open(my $arFH,'<',"$allFile")
- || errorX("Failed to open file: '$allFile'");
-
- while(<$arFH>) {
- chomp;
- unless(/^#/) {
- my @F = split(/\t/);
- next if((scalar(@F)>=7) && ($F[6] ne "PASS"));
- }
- print $pFH "$_\n";
- }
-
- close($arFH);
- close($pFH);
-
- my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName);
- checkMove($allFile,$allFileFinished);
-
- my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName);
- checkMove($passedFile,$passedFileFinished);
-}
-
-concatenateVcfs("somatic.snvs.vcf");
-concatenateVcfs("somatic.indels.vcf");
-
-
-my $bamSuffix = ".realigned.bam";
-
-sub consolidateBam($) {
- my $label = shift;
-
- my $fileName = $label . $bamSuffix;
-
- my $reDir = File::Spec->catdir($outDir,'realigned');
- checkMakeDir($reDir);
-
- my @bamList;
- for my $chrom (@chromOrder) {
- my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
-
- my $chromSizeKey = "chrom_" . $chrom . "_size";
- my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
- for my $binId (@$binList) {
- my $binDir = File::Spec->catdir($chromDir,'bins',$binId);
- my $rbamFile = File::Spec->catfile($binDir,$fileName);
- checkFile($rbamFile,"bin realigned bam file");
-
- push @bamList,$rbamFile;
- }
- }
-
- return unless(scalar(@bamList));
-
- my $headerFH = File::Temp->new();
- my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'";
- executeCmd($getHeaderCmd);
-
- my $allFile = File::Spec->catfile($reDir,$fileName . $itag);
- my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList);
- executeCmd($cmd);
-
- my $allFileFinished = File::Spec->catfile($reDir,$fileName);
- checkMove($allFile,$allFileFinished);
-
- my $indexCmd="$samtoolsBin index $allFileFinished";
- executeCmd($indexCmd);
-
- # for now don't remove all the bin realignments...
- # unlink(@bamList);
-}
-
-if($isWriteRealignedBam) {
- consolidateBam("normal");
- consolidateBam("tumor");
-}
-
-
-1;
-
-__END__
-
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/countFastaBases
Binary file strelka2/libexec/countFastaBases has changed
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/filterSomaticVariants.pl
--- a/strelka2/libexec/filterSomaticVariants.pl Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,435 +0,0 @@
-#!/usr/bin/env perl
-
-=head1 LICENSE
-
-Strelka Workflow Software
-Copyright (c) 2009-2013 Illumina, Inc.
-
-This software is provided under the terms and conditions of the
-Illumina Open Source Software License 1.
-
-You should have received a copy of the Illumina Open Source
-Software License 1 along with this program. If not, see
-.
-
-=head1 SYNOPSIS
-
-filterSomaticVariants.pl [options] | --help
-
-=head2 SUMMARY
-
-Aggregate and filter the variant caller results by chromosome
-
-=cut
-
-use warnings FATAL => 'all';
-use strict;
-
-use Carp;
-$SIG{__DIE__} = \&Carp::confess;
-
-use File::Spec;
-use Getopt::Long;
-use Pod::Usage;
-
-my $baseDir;
-my $libDir;
-#my $vcftDir;
-BEGIN {
- my $thisDir=(File::Spec->splitpath($0))[1];
- $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
- $libDir=File::Spec->catdir($baseDir,'lib');
- #my $optDir=File::Spec->catdir($baseDir,'opt');
- #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl');
-}
-use lib $libDir;
-use Utils;
-#use lib $vcftDir;
-#use Vcf;
-
-
-my $scriptName=(File::Spec->splitpath($0))[2];
-my $argCount=scalar(@ARGV);
-my $cmdline = join(' ',$0,@ARGV);
-
-
-my ($chrom, $configFile);
-my $help;
-
-GetOptions( "chrom=s" => \$chrom,
- "config=s" => \$configFile,
- "help|h" => \$help) or pod2usage(2);
-
-pod2usage(2) if($help);
-pod2usage(2) unless(defined($chrom));
-pod2usage(2) unless(defined($configFile));
-
-
-
-#
-# read config and validate values
-#
-checkFile($configFile,"configuration ini");
-my $config = parseConfigIni($configFile);
-
-my $chromSizeKey = "chrom_" . $chrom . "_size";
-my $chromKnownSizeKey = "chrom_" . $chrom . "_knownSize";
-
-for (("outDir", $chromSizeKey, $chromKnownSizeKey)) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
-}
-for (qw(binSize ssnvQuality_LowerBound sindelQuality_LowerBound
- isSkipDepthFilters depthFilterMultiple
- snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac
- indelMaxRefRepeat indelMaxWindowFilteredBasecallFrac
- indelMaxIntHpolLength)) {
- errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
-}
-
-my $outDir = $config->{derived}{outDir};
-my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
-checkDir($outDir,"output");
-checkDir($chromDir,"output chromosome");
-
-my $userconfig = $config->{user};
-
-my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
-for my $binId (@$binList) {
- my $dir = File::Spec->catdir($chromDir,'bins',$binId);
- checkDir($dir,"input bin");
-}
-
-#
-# parameters from user config file:
-#
-
-# minimum passed ssnv_nt Q-score:
-my $minQSSNT=$userconfig->{ssnvQuality_LowerBound};
-# minimum passed sindel_nt Q-score:
-my $minQSINT=$userconfig->{sindelQuality_LowerBound};
-
-#
-# filtration parameters from user config file:
-#
-
-# skip depth filters for targeted resequencing:
-my $isUseDepthFilter=(! $userconfig->{isSkipDepthFilters});
-# multiple of the normal mean coverage to filter snvs and indels
-my $depthFilterMultiple=$userconfig->{depthFilterMultiple};
-# max filtered basecall fraction for any sample
-my $snvMaxFilteredBasecallFrac=$userconfig->{snvMaxFilteredBasecallFrac};
-# max snv spanning-deletion fraction for any sample
-my $snvMaxSpanningDeletionFrac=$userconfig->{snvMaxSpanningDeletionFrac};
-# max indel reference repeat length
-my $indelMaxRefRepeat=$userconfig->{indelMaxRefRepeat};
-# max indel window filter fraction
-my $indelMaxWindowFilteredBasecallFrac=$userconfig->{indelMaxWindowFilteredBasecallFrac};
-# max indel interupted hompolymer length:
-my $indelMaxIntHpolLength=$userconfig->{indelMaxIntHpolLength};
-
-
-# first we want the normal sample mean chromosome depth:
-#
-my $filterCoverage;
-if($isUseDepthFilter) {
- my $totalCoverage = 0;
- for my $binId (@$binList) {
- my $sFile = File::Spec->catfile($chromDir,'bins',$binId,'strelka.stats');
- checkFile($sFile,"strelka bin stats");
- open(my $sFH, '<', $sFile)
- || errorX("Can't open file: '$sFile' $!");
-
- my $is_found=0;
- while(<$sFH>) {
- next unless(/^NORMAL_NO_REF_N_COVERAGE\s/);
- my $is_bad = 0;
- if(not /sample_size:\s*(\d+)\s+/) { $is_bad=1; }
- my $ss=$1;
- # leave the regex for mean fairly loose to pick up scientific notation, etc..
- if(not /mean:\s*(\d[^\s]*|nan)\s+/) { $is_bad=1; }
- my $mean=$1;
- errorX("Unexpected format in file: '$sFile'") if($is_bad);
-
- my $coverage = ( $ss==0 ? 0 : int($ss*$mean) );
-
- $totalCoverage += $coverage;
- $is_found=1;
- last;
- }
- close($sFH);
- errorX("Unexpected format in file: '$sFile'") unless($is_found);
- }
-
- my $chromKnownSize = $config->{derived}{$chromKnownSizeKey};
- my $normalMeanCoverage = ($totalCoverage/$chromKnownSize);
- $filterCoverage = $normalMeanCoverage*$depthFilterMultiple;
-}
-
-
-# add filter description to vcf header unless it already exists
-# return 1 if filter id already exists, client can decide if this is an error
-#
-sub add_vcf_filter($$$) {
- my ($vcf,$id,$desc) = @_;
- return 1 if(scalar(@{$vcf->get_header_line(key=>'FILTER', ID=>$id)}));
- $vcf->add_header_line({key=>'FILTER', ID=>$id,Description=>$desc});
- return 0;
-}
-
-
-sub check_vcf_for_sample($$$) {
- my ($vcf,$sample,$file) = @_;
- my $is_found=0;
- for ($vcf->get_samples()) {
- $is_found=1 if($_ eq $sample);
- }
- errorX("Failed to find sample '$sample' in vcf file '$file'") unless($is_found);
-}
-
-
-
-my $depthFiltId="DP";
-
-
-# Runs all post-call vcf filters:
-#
-sub filterSnvFileList(\@$$$) {
- my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_;
-
- my $baseFiltId="BCNoise";
- my $spanFiltId="SpanDel";
- my $qFiltId="QSS_ref";
-
- open(my $aFH,'>',$acceptFileName)
- or errorX("Failed to open file: '$acceptFileName'");
-
- my $is_first=1;
- for my $ifile (@$ifiles) {
- open(my $iFH,'<',"$ifile")
- or errorX("Failed to open file: '$ifile'");
- my $vcf = Vcf->new(fh=>$iFH);
-
- # run some simple header validation on each vcf:
- $vcf->parse_header();
- check_vcf_for_sample($vcf,'NORMAL',$ifile);
- check_vcf_for_sample($vcf,'TUMOR',$ifile);
-
- if($is_first) {
- # TODO: update vcf meta-data for chromosome-level filtered files
- #
- $vcf->remove_header_line(key=>"cmdline");
- $vcf->add_header_line({key=>"cmdline",value=>$cmdline});
- if($isUseDepthFilter) {
- $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal});
- add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample");
- }
- add_vcf_filter($vcf,$baseFiltId,"Fraction of basecalls filtered at this site in either sample is at or above $snvMaxFilteredBasecallFrac");
- add_vcf_filter($vcf,$spanFiltId,"Fraction of reads crossing site with spanning deletions in either sample exceeeds $snvMaxSpanningDeletionFrac");
- add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or ssnv Q-score < $minQSSNT, ie calls with NT!=ref or QSS_NT < $minQSSNT");
- print $aFH $vcf->format_header();
- $is_first=0;
- }
-
- while(my $x=$vcf->next_data_hash()) {
- my $norm=$x->{gtypes}->{NORMAL};
- my $tumr=$x->{gtypes}->{TUMOR};
-
- my %filters;
-
- # normal depth filter:
- my $normalDP=$norm->{DP};
- if($isUseDepthFilter) {
- $filters{$depthFiltId} = ($normalDP > $depthFilterVal);
- }
-
- # filtered basecall fraction:
- my $normal_filt=($normalDP>0 ? $norm->{FDP}/$normalDP : 0);
-
- my $tumorDP=$tumr->{DP};
- my $tumor_filt=($tumorDP>0 ? $tumr->{FDP}/$tumorDP : 0);
-
- $filters{$baseFiltId}=(($normal_filt >= $snvMaxFilteredBasecallFrac) or
- ($tumor_filt >= $snvMaxFilteredBasecallFrac));
-
- # spanning deletion fraction:
- my $normalSDP=$norm->{SDP};
- my $normalSpanTot=($normalDP+$normalSDP);
- my $normalSpanDelFrac=($normalSpanTot>0 ? ($normalSDP/$normalSpanTot) : 0);
-
- my $tumorSDP=$tumr->{SDP};
- my $tumorSpanTot=($tumorDP+$tumorSDP);
- my $tumorSpanDelFrac=($tumorSpanTot>0 ? ($tumorSDP/$tumorSpanTot) : 0);
-
- $filters{$spanFiltId}=(($normalSpanDelFrac > $snvMaxSpanningDeletionFrac) or
- ($tumorSpanDelFrac > $snvMaxSpanningDeletionFrac));
-
- # Q-val filter:
- $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or
- ($x->{INFO}->{QSS_NT} < $minQSSNT));
-
- $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters);
-
- print $aFH $vcf->format_line($x);
- }
-
- $vcf->close();
- close($iFH);
- }
-
- close($aFH);
-}
-
-
-
-sub updateA2(\@$) {
- my ($a2,$FH) = @_;
- my $line=<$FH>;
- unless(defined($line)) { errorX("Unexpected end of somatic indel window file"); }
- chomp $line;
- @$a2 = split("\t",$line);
- unless(scalar(@$a2)) { errorX("Unexpected format in somatic indel window file"); }
-}
-
-
-
-sub filterIndelFileList(\@$$$) {
- my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_;
-
- my $repeatFiltId="Repeat";
- my $iHpolFiltId="iHpol";
- my $baseFiltId="BCNoise";
- my $qFiltId="QSI_ref";
-
- open(my $aFH,'>',$acceptFileName)
- or errorX("Failed to open file: '$acceptFileName'");
-
- my $is_first=1;
- for my $ifile (@$ifiles) {
- open(my $iFH,'<',"$ifile")
- or errorX("Failed to open somatic indel file: '$ifile'");
-
- my $iwfile = $ifile . ".window";
- open(my $iwFH,'<',"$iwfile")
- or errorX("Failed to open somatic indel window file: '$iwfile'");
-
- my @a2; # hold window file data for one line in case we overstep...
-
- my $vcf = Vcf->new(fh=>$iFH);
-
- # run some simple header validation on each vcf:
- $vcf->parse_header();
- check_vcf_for_sample($vcf,'NORMAL',$ifile);
- check_vcf_for_sample($vcf,'TUMOR',$ifile);
-
- if($is_first) {
- # TODO: update all vcf meta-data for chromosome-level filtered files
- #
- $vcf->remove_header_line(key=>"cmdline");
- $vcf->add_header_line({key=>"cmdline",value=>$cmdline});
- if($isUseDepthFilter) {
- $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal});
- }
- $vcf->add_header_line({key=>'FORMAT', ID=>'DP50',Number=>1,Type=>'Float',Description=>'Average tier1 read depth within 50 bases'});
- $vcf->add_header_line({key=>'FORMAT', ID=>'FDP50',Number=>1,Type=>'Float',Description=>'Average tier1 number of basecalls filtered from original read depth within 50 bases'});
- $vcf->add_header_line({key=>'FORMAT', ID=>'SUBDP50',Number=>1,Type=>'Float',Description=>'Average number of reads below tier1 mapping quality threshold aligned across sites within 50 bases'});
- if($isUseDepthFilter) {
- add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample");
- }
- add_vcf_filter($vcf,$repeatFiltId,"Sequence repeat of more than ${indelMaxRefRepeat}x in the reference sequence");
- add_vcf_filter($vcf,$iHpolFiltId,"Indel overlaps an interupted homopolymer longer than ${indelMaxIntHpolLength}x in the reference sequence");
- add_vcf_filter($vcf,$baseFiltId,"Average fraction of filtered basecalls within 50 bases of the indel exceeds $indelMaxWindowFilteredBasecallFrac");
- add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or sindel Q-score < $minQSINT, ie calls with NT!=ref or QSI_NT < $minQSINT");
- print $aFH $vcf->format_header();
- $is_first=0;
- }
-
- while(my $x=$vcf->next_data_hash()) {
- $vcf->add_format_field($x,'DP50');
- $vcf->add_format_field($x,'FDP50');
- $vcf->add_format_field($x,'SUBDP50');
-
- my $norm=$x->{gtypes}->{NORMAL};
- my $tumr=$x->{gtypes}->{TUMOR};
-
- my $chrom=$x->{CHROM};
- my $pos=int($x->{POS});
-
- # get matching line from window file:
- while((scalar(@a2)<2) or
- (($a2[0] le $chrom) and (int($a2[1]) < $pos))) {
- updateA2(@a2,$iwFH);
- }
- unless(scalar(@a2) and ($a2[0] eq $chrom) and (int($a2[1]) == $pos))
- { errorX("Can't find somatic indel window position.\nIndel line: " . $vcf->format_line($x) ); }
-
- # add window data to vcf record:
- #
- $norm->{DP50} = $a2[2]+$a2[3];
- $norm->{FDP50} = $a2[3];
- $norm->{SUBDP50} = $a2[4];
- $tumr->{DP50} = $a2[5]+$a2[6];
- $tumr->{FDP50} = $a2[6];
- $tumr->{SUBDP50} = $a2[7];
-
- my %filters;
-
- # normal depth filter:
- my $normalDP=$norm->{DP};
- if($isUseDepthFilter) {
- $filters{$depthFiltId}=($normalDP > $depthFilterVal);
- }
-
- # ref repeat
- my $refRep=$x->{INFO}->{RC};
- $filters{$repeatFiltId}=(defined($refRep) and
- ($refRep > $indelMaxRefRepeat));
-
- # ihpol
- my $iHpol=$x->{INFO}->{IHP};
- $filters{$iHpolFiltId}=(defined($iHpol) and
- ($iHpol > $indelMaxIntHpolLength));
-
- # base filt:
- my $normWinFrac=( $norm->{DP50} ? $norm->{FDP50}/$norm->{DP50} : 0 );
- my $tumrWinFrac=( $tumr->{DP50} ? $tumr->{FDP50}/$tumr->{DP50} : 0 );
- $filters{$baseFiltId}=( ($normWinFrac >= $indelMaxWindowFilteredBasecallFrac) or
- ($tumrWinFrac >= $indelMaxWindowFilteredBasecallFrac) );
-
- # Q-val filter:
- $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or
- ($x->{INFO}->{QSI_NT} < $minQSINT));
-
- $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters);
-
- print $aFH $vcf->format_line($x);
- }
-
- $vcf->close();
- close($iFH);
- close($iwFH);
- }
-
- close($aFH);
-}
-
-
-
-my @ssnvFiles;
-my @sindelFiles;
-for my $binId (@$binList) {
- my $ssnvFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.snvs.unfiltered.vcf');
- my $sindelFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.indels.unfiltered.vcf');
- checkFile($ssnvFile,"bin snv file");
- checkFile($sindelFile,"bin indel file");
- push @ssnvFiles,$ssnvFile;
- push @sindelFiles,$sindelFile;
-}
-
-my $ssnvOutFile = File::Spec->catfile($chromDir,"somatic.snvs.vcf");
-filterSnvFileList(@ssnvFiles,$filterCoverage,$ssnvOutFile,$isUseDepthFilter);
-
-my $sindelOutFile = File::Spec->catfile($chromDir,"somatic.indels.vcf");
-filterIndelFileList(@sindelFiles,$filterCoverage,$sindelOutFile,$isUseDepthFilter);
-
-
-1;
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/strelka2
Binary file strelka2/libexec/strelka2 has changed
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/strelka2.xml
--- a/strelka2/strelka2.xml Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-
-
- Strelka good interface but no dependencies
- strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam --refFile $refFile
- #if $configuration.configuration_switch == 'Default':
- --configFile Default
- #else if $configuration.configuration_switch == 'Path':
- --configFile $configuration.configFile
- #else:
- --configFile Custom
- --depthFilterMultiple $configuration.depthFilterMultiple
- --snvMaxFilteredBasecallFrac $configuration.snvMaxFilteredBasecallFrac
- --snvMaxSpanningDeletionFrac $configuration.snvMaxSpanningDeletionFrac
- --indelMaxRefRepeat $configuration.indelMaxRefRepeat
- --indelMaxWindowFilteredBasecallFrac $configuration.indelMaxWindowFilteredBasecallFrac
- --indelMaxIntHpolLength $configuration.indelMaxIntHpolLength
- --ssnvPrior $configuration.ssnvPrior
- --sindelPrior $configuration.sindelPrior
- --ssnvNoise $configuration.ssnvNoise
- --sindelNoise $configuration.sindelNoise
- --ssnvNoiseStrandBiasFrac $configuration.ssnvNoiseStrandBiasFrac
- --minTier1Mapq $configuration.minTier1Mapq
- --minTier2Mapq $configuration.minTier2Mapq
- --ssnvQuality_LowerBound $configuration.ssnvQuality_LowerBound
- --sindelQuality_LowerBound $configuration.sindelQuality_LowerBound
- --isWriteRealignedBam $configuration.isWriteRealignedBam
- --binSize $configuration.binSize
- --isSkipDepthFilters $configuration.isSkipDepthFilters
- --maxInputDepth $configuration.maxInputDepth
- #if $configuration.extra_arguments.extra_arguments_switch == 'Yes':
- --extraStrelkaArguments $configuration.extra_arguments.extraStrelkaArguments
- #end if
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- conf_file.conf_file_switch == "Yes"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Strelka, a method for somatic SNV and small indel detectipon from sequencing data of matched tumor-normal samples.
-
-
-
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/strelka_config.sample
--- a/strelka2/strelka_config.sample Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-
-;
-; User configuration options for Strelka somatic small-variant caller
-; workflow:
-;
-
-[user]
-
-;
-; isSkipDepthFilters should be set to 1 to skip depth filtration for
-; whole exome or other targeted sequencing data
-;
-isSkipDepthFilters = 1
-
-;
-; strelka will not accept input reads above this depth (they will be skipped
-; until the depth drops below this value). Set this value <= 0 to disable
-; this feature. Using this filter will bound memory usage given extremely high
-; depth input, but may be problematic in high-depth targeted sequencing
-; applications.
-;
-maxInputDepth = 10000
-
-;
-; If the depth filter is not skipped, all variants which occur at a
-; depth greater than depthFilterMultiple*chromosome mean depth will be
-; filtered out.
-;
-depthFilterMultiple = 3.0
-
-;
-; Somatic SNV calls are filtered at sites where greater than this
-; fraction of basecalls have been removed by the mismatch density
-; filter in either sample.
-;
-snvMaxFilteredBasecallFrac = 0.4
-
-;
-; Somatic SNV calls are filtered at sites where greater than this
-; fraction of overlapping reads contain deletions which span the SNV
-; call site.
-;
-snvMaxSpanningDeletionFrac = 0.75
-
-;
-; Somatic indel calls are filtered if they represent an expansion or
-; contraction of a repeated pattern with a repeat count greater than
-; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8,
-; then the indel is filtered when it is an expansion/contraction of a
-; homopolymer longer than 8 bases, a dinucleotide repeat longer than
-; 16 bases, etc.)
-;
-indelMaxRefRepeat = 8
-
-;
-; Somatic indel calls are filtered if greater than this fraction of
-; basecalls in a window extending 50 bases to each side of an indel's
-; call position have been removed by the mismatch density filter.
-;
-indelMaxWindowFilteredBasecallFrac = 0.3
-
-;
-; Somatic indels are filtered if they overlap ’interrupted
-; homopolymers’ greater than this length. The term 'interrupted
-; homopolymer' is used to indicate the longest homopolymer which can
-; be found intersecting or adjacent to the called indel when a single
-; non-homopolymer base is allowed.
-;
-indelMaxIntHpolLength = 14
-
-;
-; prior probability of a somatic snv or indel
-;
-ssnvPrior = 0.000001
-sindelPrior = 0.000001
-
-;
-; probability of an snv or indel noise allele
-;
-; NB: in the calling model a noise allele is shared in tumor and
-; normal samples, but occurs at any frequency.
-;
-ssnvNoise = 0.0000005
-sindelNoise = 0.000001
-
-;
-; Fraction of snv noise attributed to strand-bias.
-;
-; It is not recommended to change this setting. However, if it is
-; essential to turn the strand bias penalization off, the following is
-; recommended:
-; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5,
-; (1) set ssnvNoiseStrandBiasFrac = 0
-; (2) divide the current ssnvNoise value by 2
-;
-ssnvNoiseStrandBiasFrac = 0.5
-
-;
-; minimum MAPQ score for PE reads at tier1:
-;
-minTier1Mapq = 20
-
-;
-; minimum MAPQ score for PE and SE reads at tier2:
-;
-minTier2Mapq = 5
-
-;
-; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are
-; marked as filtered:
-;
-ssnvQuality_LowerBound = 15
-
-;
-; Somatic quality score (QSI_NT, NT=ref) below which somatic indels
-; are marked as filtered:
-;
-sindelQuality_LowerBound = 30
-
-;
-; Optionally write out read alignments which were altered during the
-; realignment step. At the completion of the workflow run, the
-; realigned reads can be found in:
-;
-; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam
-;
-isWriteRealignedBam = 0
-
-;
-; Jobs are parallelized over segments of the reference genome no larger
-; than this size:
-;
-binSize = 25000000
-
-;
-; Additional arguments passed to strelka.
-;
-extraStrelkaArguments =
-
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/strelka_wrapper.py
--- a/strelka2/strelka_wrapper.py Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,201 +0,0 @@
-#!/usr/bin/env python
-#Dan Blankenberg
-
-"""
-A wrapper script for running the GenomeAnalysisTK.jar commands.
-"""
-
-from __future__ import print_function
-import sys, argparse, os, tempfile, subprocess, shutil
-from binascii import unhexlify
-from string import Template
-from galaxy import eggs
-#import pkg_resources; pkg_resources.require( "bx-python" )
-
-#GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is
-#GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
-#DEFAULT_GATK_PREFIX = "gatk_file"
-#CHUNK_SIZE = 2**20 #1mb
-#
-#
-def cleanup_before_exit( tmp_dir ):
- if tmp_dir and os.path.exists( tmp_dir ):
- shutil.rmtree( tmp_dir )
-
-def _create_config(args, config_path):
- conf_file = open(config_path, "w")
- conf_file.write("[user]\n")
- for option in args:
- if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None:
- conf_file.write("%s=%s\n" % (option, args[option]))
- conf_file.close()
-
-def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error):
- stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name
- proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) )
- return_code = proc.wait()
- if return_code:
- for line in open( stderr_name ):
- print(line, file=sys.stderr)
- os.unlink( stderr_name ) #clean up
- cleanup_before_exit( tmp_dir )
- raise Exception( msg_error )
- else:
- os.unlink( stderr_name )
-
-def index_bam_files( bam_filenames, tmp_dir ):
- for bam_filename in bam_filenames:
- bam_index_filename = "%s.bai" % bam_filename
- print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename))
- if not os.path.exists( bam_index_filename ):
- #need to index this bam file
- command = 'samtools index %s %s' % ( bam_filename, bam_index_filename )
- my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename)
-
-def index_fasta_files( fasta_filenames, tmp_dir ):
- for fasta_filename in fasta_filenames:
- fasta_index_filename = "%s.fai" % fasta_filename
- print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename))
- if not os.path.exists( fasta_index_filename ):
- #need to index this bam file
- command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename )
- my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename)
-
-def __main__():
- #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD
- #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD
-
- root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2"
- expected_dir="for_tests"
- job_dir=os.getcwd()
- analysis_dir=job_dir + "/StrelkaAnalysis"
- config_script=root_dir + "/configureStrelkaWorkflow.pl"
- tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' )
- config_ini = "%s/config.ini" % (tmp_dir)
-
- print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini)
-
- #manage parsing
- parser = argparse.ArgumentParser()
- parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False )
- parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False )
- parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False )
- parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False )
- parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False )
- parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False )
- parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False )
- parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False )
- parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False )
- parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False )
- parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False )
- parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False )
- parser.add_argument( '--binSize', help='path to tumor bam file', required = False )
- parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False )
- parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False )
- parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False )
- args = parser.parse_args()
-
- #verifying eveything's ok
- if not os.path.isfile(config_script):
- sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'")
- print("configuring...", file=sys.stdout)
- if os.path.exists(analysis_dir):
- sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen")
-
-
- # creating index if needed
- os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH']
- bam_filenames = [ args.tumorBam, args.normalBam ]
- index_bam_files( bam_filenames, tmp_dir )
- fasta_files = [ args.refFile ]
- index_fasta_files( fasta_files, tmp_dir )
-
- #creating config file if needed
- if args.configFile == "Custom":
- _create_config(vars(args), config_ini)
- elif args.configFile == "Default":
- cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini)
- my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
- else:
- if not os.path.exists(args.configFile):
- print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr)
- cmdbash="cp %s %s" % (args.configFile, config_ini)
- my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
-
-
-
-
- #configuration of workflow
- cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir)
- print( "**** Starting configuration.")
- print( "**** Configuration cmd: '" + cmd + "'")
- my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !")
- print("completed configuration")
-
- #run the workflow !
- cmd="make -C " + analysis_dir
- print("**** starting workflow.")
- print("**** workflow cmd: '" + cmd + "'")
- my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !")
- print("**** completed workflow execution")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#bam_filenames = []
-# if options.datasets:
-# for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets:
-# gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function
-# if dataset_arg:
-# cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename )
-# if galaxy_ext == "bam":
-# bam_filenames.append( gatk_filename )
-# #set up stdout and stderr output options
-# stdout = open_file_from_option( options.stdout, mode = 'wb' )
-# stderr = open_file_from_option( options.stderr, mode = 'wb' )
-# #if no stderr file is specified, we'll use our own
-# if stderr is None:
-# stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir )
-#
-# proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
-# return_code = proc.wait()
-#
-# if return_code:
-# stderr_target = sys.stderr
-# else:
-# stderr_target = sys.stdout
-# stderr.flush()
-# stderr.seek(0)
-# while True:
-# chunk = stderr.read( CHUNK_SIZE )
-# if chunk:
-# stderr_target.write( chunk )
-# else:
-# break
-# stderr.close()
-# #generate html reports
-# if options.html_report_from_directory:
-# for ( html_filename, html_dir ) in options.html_report_from_directory:
-# html_report_from_directory( open( html_filename, 'wb' ), html_dir )
-#
-# cleanup_before_exit( tmp_dir )
-
-if __name__=="__main__": __main__()
diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/tool_dependencies.xml
--- a/strelka2/tool_dependencies.xml Thu Sep 25 12:02:14 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-