6
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 =head1 LICENSE
|
|
4
|
|
5 Strelka Workflow Software
|
|
6 Copyright (c) 2009-2013 Illumina, Inc.
|
|
7
|
|
8 This software is provided under the terms and conditions of the
|
|
9 Illumina Open Source Software License 1.
|
|
10
|
|
11 You should have received a copy of the Illumina Open Source
|
|
12 Software License 1 along with this program. If not, see
|
|
13 <https://github.com/downloads/sequencing/licenses/>.
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 consolidateSomaticVariants.pl [options] | --help
|
|
18
|
|
19 =head2 SUMMARY
|
|
20
|
|
21 Aggregate final results from all chromosomes
|
|
22
|
|
23 =cut
|
|
24
|
|
25 use warnings FATAL => 'all';
|
|
26 use strict;
|
|
27
|
|
28 use Carp;
|
|
29 $SIG{__DIE__} = \&Carp::confess;
|
|
30
|
|
31 use File::Spec;
|
|
32 use File::Temp;
|
|
33 use Getopt::Long;
|
|
34 use Pod::Usage;
|
|
35
|
|
36 my $baseDir;
|
|
37 my $libDir;
|
|
38 #my $optDir;
|
|
39 #my $vcftDir;
|
|
40 BEGIN {
|
|
41 my $thisDir=(File::Spec->splitpath($0))[1];
|
|
42 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
|
|
43 $libDir=File::Spec->catdir($baseDir,'lib');
|
|
44 #$optDir=File::Spec->catdir($baseDir,'opt');
|
|
45 #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl');
|
|
46 }
|
|
47 use lib $libDir;
|
|
48 use Utils;
|
|
49 #use lib $vcftDir;
|
|
50 use Vcf;
|
|
51
|
|
52 if(getAbsPath($baseDir)) {
|
|
53 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
|
|
54 }
|
|
55 #$optDir=File::Spec->catdir($baseDir,'opt');
|
|
56
|
|
57
|
|
58 my $scriptName=(File::Spec->splitpath($0))[2];
|
|
59 my $argCount=scalar(@ARGV);
|
|
60 my $cmdline=join(' ',$0,@ARGV);
|
|
61
|
|
62
|
|
63 my $configFile;
|
|
64 my $help;
|
|
65
|
|
66 GetOptions( "config=s" => \$configFile,
|
|
67 "help|h" => \$help) or pod2usage(2);
|
|
68
|
|
69 pod2usage(2) if($help);
|
|
70 pod2usage(2) unless(defined($configFile));
|
|
71
|
|
72 #
|
|
73 # check fixed paths
|
|
74 #
|
|
75 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
|
|
76 #checkFile($samtoolsBin,"samtools binary");
|
|
77
|
|
78
|
|
79 #
|
|
80 # read config and validate values
|
|
81 #
|
|
82 checkFile($configFile,"configuration ini");
|
|
83 my $config = parseConfigIni($configFile);
|
|
84
|
|
85
|
|
86 for (qw(outDir chromOrder)) {
|
|
87 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
|
|
88 }
|
|
89 for (qw(isWriteRealignedBam binSize)) {
|
|
90 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
|
|
91 }
|
|
92
|
|
93 my $userconfig = $config->{user};
|
|
94
|
|
95 my @chromOrder = split(/\t/,$config->{derived}{chromOrder});
|
|
96 for my $chrom (@chromOrder) {
|
|
97 my $chromSizeKey = "chrom_" . $chrom . "_size";
|
|
98 errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey));
|
|
99 }
|
|
100
|
|
101 my $outDir = $config->{derived}{outDir};
|
|
102 checkDir($outDir,"output");
|
|
103
|
|
104
|
|
105 my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam};
|
|
106
|
|
107 for my $chrom (@chromOrder) {
|
|
108 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
109 checkDir($chromDir,"input chromosome");
|
|
110
|
|
111 next unless($isWriteRealignedBam);
|
|
112 my $chromSizeKey = "chrom_" . $chrom . "_size";
|
|
113 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
|
|
114 for my $binId (@$binList) {
|
|
115 my $dir = File::Spec->catdir($chromDir,'bins',$binId);
|
|
116 checkDir($dir,"input bin");
|
|
117 }
|
|
118 }
|
|
119
|
|
120
|
|
121
|
|
122 # suffix used for large result file intermediates:
|
|
123 my $itag = ".incomplete";
|
|
124
|
|
125
|
|
126 #
|
|
127 # concatenate vcfs:
|
|
128 #
|
|
129 sub concatenateVcfs($) {
|
|
130 my $fileName = shift;
|
|
131
|
|
132 my $is_first = 1;
|
|
133
|
|
134 my $allFileName = "all." . $fileName;
|
|
135 my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag);
|
|
136 open(my $aFH,'>',"$allFile")
|
|
137 || errorX("Failed to open file: '$allFile'");
|
|
138
|
|
139 # loop over all chroms once to create the header, and one more time for all the data:
|
|
140 my $headervcf;
|
|
141 for my $chrom (@chromOrder) {
|
|
142 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
143 my $iFile = File::Spec->catfile($chromDir,$fileName);
|
|
144 checkFile($iFile);
|
|
145
|
|
146 my $depthKey="maxDepth_${chrom}";
|
|
147
|
|
148 if($is_first) {
|
|
149 open(my $iFH,'<',"$iFile")
|
|
150 || errorX("Failed to open file: '$iFile'");
|
|
151 $headervcf = Vcf->new(fh=>$iFH);
|
|
152 $headervcf->parse_header();
|
|
153 $headervcf->remove_header_line(key=>"cmdline");
|
|
154 $headervcf->add_header_line({key=>"cmdline",value=>$cmdline});
|
|
155 $headervcf->remove_header_line(key=>"$depthKey");
|
|
156 close($iFH);
|
|
157 $is_first=0;
|
|
158 }
|
|
159
|
|
160 {
|
|
161 open(my $iFH,'<',"$iFile")
|
|
162 || errorX("Failed to open file: '$iFile'");
|
|
163 my $vcf = Vcf->new(fh=>$iFH);
|
|
164 $vcf->parse_header();
|
|
165 for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) {
|
|
166 # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!!
|
|
167 $headervcf->add_header_line($line->[0]);
|
|
168 }
|
|
169 $vcf->close();
|
|
170 close($iFH);
|
|
171 }
|
|
172 }
|
|
173 print $aFH $headervcf->format_header();
|
|
174 $headervcf->close();
|
|
175
|
|
176 for my $chrom (@chromOrder) {
|
|
177 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
178 my $iFile = File::Spec->catfile($chromDir,$fileName);
|
|
179
|
|
180 open(my $iFH,'<',"$iFile")
|
|
181 || errorX("Failed to open file: '$iFile'");
|
|
182
|
|
183 my $vcf = Vcf->new(fh=>$iFH);
|
|
184 $vcf->parse_header();
|
|
185 print $aFH $_ while(<$iFH>);
|
|
186 }
|
|
187
|
|
188 close($aFH);
|
|
189
|
|
190 # make a second set of files with only the passed variants:
|
|
191 my $passedFileName = "passed." . $fileName;
|
|
192 my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag);
|
|
193 open(my $pFH,'>',"$passedFile")
|
|
194 || errorX("Failed to open file: '$passedFile'");
|
|
195
|
|
196 open(my $arFH,'<',"$allFile")
|
|
197 || errorX("Failed to open file: '$allFile'");
|
|
198
|
|
199 while(<$arFH>) {
|
|
200 chomp;
|
|
201 unless(/^#/) {
|
|
202 my @F = split(/\t/);
|
|
203 next if((scalar(@F)>=7) && ($F[6] ne "PASS"));
|
|
204 }
|
|
205 print $pFH "$_\n";
|
|
206 }
|
|
207
|
|
208 close($arFH);
|
|
209 close($pFH);
|
|
210
|
|
211 my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName);
|
|
212 checkMove($allFile,$allFileFinished);
|
|
213
|
|
214 my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName);
|
|
215 checkMove($passedFile,$passedFileFinished);
|
|
216 }
|
|
217
|
|
218 concatenateVcfs("somatic.snvs.vcf");
|
|
219 concatenateVcfs("somatic.indels.vcf");
|
|
220
|
|
221
|
|
222 my $bamSuffix = ".realigned.bam";
|
|
223
|
|
224 sub consolidateBam($) {
|
|
225 my $label = shift;
|
|
226
|
|
227 my $fileName = $label . $bamSuffix;
|
|
228
|
|
229 my $reDir = File::Spec->catdir($outDir,'realigned');
|
|
230 checkMakeDir($reDir);
|
|
231
|
|
232 my @bamList;
|
|
233 for my $chrom (@chromOrder) {
|
|
234 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
235
|
|
236 my $chromSizeKey = "chrom_" . $chrom . "_size";
|
|
237 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
|
|
238 for my $binId (@$binList) {
|
|
239 my $binDir = File::Spec->catdir($chromDir,'bins',$binId);
|
|
240 my $rbamFile = File::Spec->catfile($binDir,$fileName);
|
|
241 checkFile($rbamFile,"bin realigned bam file");
|
|
242
|
|
243 push @bamList,$rbamFile;
|
|
244 }
|
|
245 }
|
|
246
|
|
247 return unless(scalar(@bamList));
|
|
248
|
|
249 my $headerFH = File::Temp->new();
|
|
250 my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'";
|
|
251 executeCmd($getHeaderCmd);
|
|
252
|
|
253 my $allFile = File::Spec->catfile($reDir,$fileName . $itag);
|
|
254 my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList);
|
|
255 executeCmd($cmd);
|
|
256
|
|
257 my $allFileFinished = File::Spec->catfile($reDir,$fileName);
|
|
258 checkMove($allFile,$allFileFinished);
|
|
259
|
|
260 my $indexCmd="$samtoolsBin index $allFileFinished";
|
|
261 executeCmd($indexCmd);
|
|
262
|
|
263 # for now don't remove all the bin realignments...
|
|
264 # unlink(@bamList);
|
|
265 }
|
|
266
|
|
267 if($isWriteRealignedBam) {
|
|
268 consolidateBam("normal");
|
|
269 consolidateBam("tumor");
|
|
270 }
|
|
271
|
|
272
|
|
273 1;
|
|
274
|
|
275 __END__
|
|
276
|