| 
6
 | 
     1 #!/usr/bin/env perl
 | 
| 
9
 | 
     2 
 | 
| 
6
 | 
     3 =head1 LICENSE
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 Strelka Workflow Software
 | 
| 
 | 
     6 Copyright (c) 2009-2013 Illumina, Inc.
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 This software is provided under the terms and conditions of the
 | 
| 
 | 
     9 Illumina Open Source Software License 1.
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 You should have received a copy of the Illumina Open Source
 | 
| 
 | 
    12 Software License 1 along with this program. If not, see
 | 
| 
 | 
    13 <https://github.com/downloads/sequencing/licenses/>.
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 =head1 SYNOPSIS
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 callSomaticVariants.pl [options] | --help
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 =head2 SUMMARY
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 Run the somatic variant caller for snvs and indels on a single
 | 
| 
 | 
    22 chromosome bin.
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 =cut
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 use warnings FATAL => 'all';
 | 
| 
 | 
    27 use strict;
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 use Carp;
 | 
| 
 | 
    30 $SIG{__DIE__} = \&Carp::confess;
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 use File::Spec;
 | 
| 
 | 
    33 use Getopt::Long;
 | 
| 
 | 
    34 use Pod::Usage;
 | 
| 
 | 
    35 
 | 
| 
 | 
    36 my $baseDir;
 | 
| 
 | 
    37 my $libDir;
 | 
| 
 | 
    38 BEGIN {
 | 
| 
 | 
    39 
 | 
| 
 | 
    40     my $thisDir=(File::Spec->splitpath($0))[1];
 | 
| 
 | 
    41     $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
 | 
| 
 | 
    42     $libDir=File::Spec->catdir($baseDir,'lib');
 | 
| 
 | 
    43 }
 | 
| 
 | 
    44 use lib $libDir;
 | 
| 
 | 
    45 use Utils;
 | 
| 
9
 | 
    46 print "all imported call";
 | 
| 
6
 | 
    47 
 | 
| 
 | 
    48 if(getAbsPath($baseDir)) {
 | 
| 
 | 
    49     errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
 | 
| 
 | 
    50 }
 | 
| 
 | 
    51 my $libexecDir=File::Spec->catdir($baseDir,'libexec');
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 my $scriptName=(File::Spec->splitpath($0))[2];
 | 
| 
 | 
    55 my $argCount=scalar(@ARGV);
 | 
| 
 | 
    56 my $cmdline = join(' ',$0,@ARGV);
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 my ($chrom, $binId, $configFile);
 | 
| 
 | 
    60 my $help;
 | 
| 
 | 
    61 
 | 
| 
 | 
    62 GetOptions(
 | 
| 
 | 
    63             "chrom=s" => \$chrom,
 | 
| 
 | 
    64             "bin=s" => \$binId,
 | 
| 
 | 
    65             "config=s" => \$configFile,
 | 
| 
 | 
    66             "help|h" => \$help) or pod2usage(2);
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 pod2usage(2) if($help);
 | 
| 
 | 
    69 pod2usage(2) unless(defined($chrom));
 | 
| 
 | 
    70 pod2usage(2) unless(defined($binId));
 | 
| 
 | 
    71 pod2usage(2) unless(defined($configFile));
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 #
 | 
| 
 | 
    76 # check all fixed paths (not based on commandline arguments):
 | 
| 
 | 
    77 #
 | 
| 
 | 
    78 checkDir($baseDir);
 | 
| 
 | 
    79 checkDir($libexecDir);
 | 
| 
 | 
    80 
 | 
| 
 | 
    81 my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
 | 
| 
 | 
    82 checkFile($strelkaBin,"strelka binary");
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 
 | 
| 
 | 
    86 #
 | 
| 
 | 
    87 # read config and validate values
 | 
| 
 | 
    88 #
 | 
| 
 | 
    89 checkFile($configFile,"configuration ini");
 | 
| 
 | 
    90 my $config  = parseConfigIni($configFile);
 | 
| 
 | 
    91 
 | 
| 
 | 
    92 for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
 | 
| 
 | 
    93     errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
 | 
| 
 | 
    94 }
 | 
| 
 | 
    95 
 | 
| 
 | 
    96 # we skip maxInputDepth,minTier2Mapq here to allow older config files
 | 
| 
 | 
    97 for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
 | 
| 
 | 
    98         ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
 | 
| 
 | 
    99     errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
 | 
| 
 | 
   100 }
 | 
| 
 | 
   101 
 | 
| 
 | 
   102 my $outDir = $config->{derived}{outDir};
 | 
| 
 | 
   103 my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
 | 
| 
 | 
   104 checkDir($outDir,"output");
 | 
| 
 | 
   105 checkDir($binDir,"output bin");
 | 
| 
 | 
   106 
 | 
| 
 | 
   107 
 | 
| 
 | 
   108 my $tumorBam = $config->{derived}{tumorBam};
 | 
| 
 | 
   109 my $normalBam = $config->{derived}{normalBam};
 | 
| 
 | 
   110 my $refFile = $config->{derived}{refFile};
 | 
| 
 | 
   111 checkFile($tumorBam,"tumor BAM");
 | 
| 
 | 
   112 checkFile($normalBam,"normal BAM");
 | 
| 
 | 
   113 checkFile($refFile,"reference");
 | 
| 
 | 
   114 
 | 
| 
 | 
   115 
 | 
| 
 | 
   116 # pull out some config options for convenience:
 | 
| 
 | 
   117 my $binSize=$config->{user}{binSize};
 | 
| 
 | 
   118 my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
 | 
| 
 | 
   119 my $knownGenomeSize = $config->{derived}{knownGenomeSize};
 | 
| 
 | 
   120 
 | 
| 
 | 
   121 
 | 
| 
 | 
   122 my $begin = (int($binId)*$binSize)+1;
 | 
| 
 | 
   123 my $end = ((int($binId)+1)*$binSize);
 | 
| 
 | 
   124 #my $end = $begin+100000;  #debug mode
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 my $useroptions = $config->{user};
 | 
| 
 | 
   127 
 | 
| 
 | 
   128 # set previous default value if an older config file is being used:
 | 
| 
 | 
   129 if(! defined($useroptions->{minTier2Mapq})) {
 | 
| 
 | 
   130     $useroptions->{minTier2Mapq} = 5;
 | 
| 
 | 
   131 }
 | 
| 
 | 
   132 
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 #
 | 
| 
 | 
   135 # setup the strelka command-line:
 | 
| 
 | 
   136 #
 | 
| 
 | 
   137 my $strelka_base_opts= "-clobber" .
 | 
| 
 | 
   138 " -filter-unanchored" .
 | 
| 
 | 
   139 " -min-paired-align-score " . $useroptions->{minTier1Mapq} .
 | 
| 
 | 
   140 " -min-single-align-score 10" .
 | 
| 
 | 
   141 " -min-qscore 0" .
 | 
| 
 | 
   142 " -report-range-begin $begin -report-range-end $end" .
 | 
| 
 | 
   143 " -samtools-reference '$refFile'" .
 | 
| 
 | 
   144 " -max-window-mismatch 3 20 -print-used-allele-counts" .
 | 
| 
 | 
   145 " -bam-seq-name '" . $chrom . "'" .
 | 
| 
 | 
   146 " -genome-size $knownGenomeSize" .
 | 
| 
 | 
   147 " -max-indel-size 50" .
 | 
| 
 | 
   148 " -indel-nonsite-match-prob 0.5" .
 | 
| 
 | 
   149 " --min-contig-open-end-support 35" .
 | 
| 
 | 
   150 " --somatic-snv-rate " . $useroptions->{ssnvPrior} .
 | 
| 
 | 
   151 " --shared-site-error-rate " . $useroptions->{ssnvNoise} .
 | 
| 
 | 
   152 " --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
 | 
| 
 | 
   153 " --somatic-indel-rate " . $useroptions->{sindelPrior} .
 | 
| 
 | 
   154 " --shared-indel-error-rate " . $useroptions->{sindelNoise} .
 | 
| 
 | 
   155 " --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
 | 
| 
 | 
   156 " --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
 | 
| 
 | 
   157 " --tier2-single-align-score-rescue-mode" .
 | 
| 
 | 
   158 " --tier2-mismatch-density-filter-count 10" .
 | 
| 
 | 
   159 " --tier2-no-filter-unanchored" .
 | 
| 
 | 
   160 " --tier2-indel-nonsite-match-prob 0.25" .
 | 
| 
 | 
   161 " --tier2-include-singleton" .
 | 
| 
 | 
   162 " --tier2-include-anomalous";
 | 
| 
 | 
   163 
 | 
| 
 | 
   164 
 | 
| 
 | 
   165 my $somSnvFile='somatic.snvs.unfiltered.vcf';
 | 
| 
 | 
   166 my $somIndelFile='somatic.indels.unfiltered.vcf';
 | 
| 
 | 
   167 
 | 
| 
 | 
   168 my $cmd =  "$strelkaBin $strelka_base_opts" .
 | 
| 
 | 
   169 " -bam-file " . $normalBam .
 | 
| 
 | 
   170 " --tumor-bam-file " . $tumorBam .
 | 
| 
 | 
   171 " --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
 | 
| 
 | 
   172 " --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
 | 
| 
 | 
   173 " --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
 | 
| 
 | 
   174 
 | 
| 
 | 
   175 
 | 
| 
 | 
   176 sub ualignFile($) {
 | 
| 
 | 
   177     return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
 | 
| 
 | 
   178 }
 | 
| 
 | 
   179 sub alignFile($) {
 | 
| 
 | 
   180     return File::Spec->catfile($binDir,$_[0] . ".realigned");
 | 
| 
 | 
   181 }
 | 
| 
 | 
   182 
 | 
| 
 | 
   183 
 | 
| 
 | 
   184 if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
 | 
| 
 | 
   185     $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
 | 
| 
 | 
   186 }
 | 
| 
 | 
   187 
 | 
| 
 | 
   188 
 | 
| 
 | 
   189 if($isWriteRealignedBam) {
 | 
| 
 | 
   190     $cmd .= " -realigned-read-file " . ualignFile("normal") .
 | 
| 
 | 
   191             " --tumor-realigned-read-file " . ualignFile("tumor");
 | 
| 
 | 
   192 }
 | 
| 
 | 
   193 
 | 
| 
 | 
   194 if(defined($useroptions->{extraStrelkaArguments})){
 | 
| 
 | 
   195     my $arg=$useroptions->{extraStrelkaArguments};
 | 
| 
 | 
   196     if($arg !~ /^\s*$/) {
 | 
| 
 | 
   197         $cmd .= " " . $arg;
 | 
| 
 | 
   198     }
 | 
| 
 | 
   199 }
 | 
| 
 | 
   200 
 | 
| 
 | 
   201 # this file contains site stats that used to be printed on stdout:
 | 
| 
 | 
   202 #
 | 
| 
 | 
   203 $cmd .=
 | 
| 
 | 
   204 " --report-file " . File::Spec->catfile($binDir,'strelka.stats');
 | 
| 
 | 
   205 
 | 
| 
 | 
   206 $cmd .=
 | 
| 
 | 
   207 " >| " . File::Spec->catfile($binDir,'strelka.stdout') .
 | 
| 
 | 
   208 " 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
 | 
| 
 | 
   209 
 | 
| 
 | 
   210 
 | 
| 
 | 
   211 executeCmd($cmd,0);
 | 
| 
 | 
   212 
 | 
| 
 | 
   213 
 | 
| 
 | 
   214 if($isWriteRealignedBam) {
 | 
| 
 | 
   215     for my $label (qw(normal tumor)) {
 | 
| 
 | 
   216         my $ufile = ualignFile($label);
 | 
| 
 | 
   217         if( -f $ufile ) {
 | 
| 
 | 
   218             my $afile = alignFile($label);
 | 
| 
 | 
   219             my $cmd = "samtools sort " . $ufile .  " " . $afile;
 | 
| 
 | 
   220             executeCmd($cmd,0);
 | 
| 
 | 
   221             unlink($ufile);
 | 
| 
 | 
   222         } else {
 | 
| 
 | 
   223             logX("Can't find unsorted realigned BAM file: '$ufile'");
 | 
| 
 | 
   224         }
 | 
| 
 | 
   225     }
 | 
| 
 | 
   226 }
 | 
| 
 | 
   227 
 | 
| 
 | 
   228 
 | 
| 
 | 
   229 1;
 | 
| 
 | 
   230 
 | 
| 
 | 
   231 __END__
 | 
| 
 | 
   232 
 | 
| 
 | 
   233 
 |