comparison strelka2/libexec/consolidateResults.pl @ 0:7a9f20ca4ad5

Uploaded
author mini
date Thu, 25 Sep 2014 11:59:08 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7a9f20ca4ad5
1 #!/usr/bin/env perl
2
3 =head1 LICENSE
4
5 Strelka Workflow Software
6 Copyright (c) 2009-2013 Illumina, Inc.
7
8 This software is provided under the terms and conditions of the
9 Illumina Open Source Software License 1.
10
11 You should have received a copy of the Illumina Open Source
12 Software License 1 along with this program. If not, see
13 <https://github.com/downloads/sequencing/licenses/>.
14
15 =head1 SYNOPSIS
16
17 consolidateSomaticVariants.pl [options] | --help
18
19 =head2 SUMMARY
20
21 Aggregate final results from all chromosomes
22
23 =cut
24
25 use warnings FATAL => 'all';
26 use strict;
27
28 use Carp;
29 $SIG{__DIE__} = \&Carp::confess;
30
31 use File::Spec;
32 use File::Temp;
33 use Getopt::Long;
34 use Pod::Usage;
35
36 my $baseDir;
37 my $libDir;
38 #my $optDir;
39 #my $vcftDir;
40 BEGIN {
41 my $thisDir=(File::Spec->splitpath($0))[1];
42 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
43 $libDir=File::Spec->catdir($baseDir,'lib');
44 #$optDir=File::Spec->catdir($baseDir,'opt');
45 #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl');
46 }
47 use lib $libDir;
48 use Utils;
49 #use lib $vcftDir;
50 use Vcf;
51
52 if(getAbsPath($baseDir)) {
53 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
54 }
55 #$optDir=File::Spec->catdir($baseDir,'opt');
56
57
58 my $scriptName=(File::Spec->splitpath($0))[2];
59 my $argCount=scalar(@ARGV);
60 my $cmdline=join(' ',$0,@ARGV);
61
62
63 my $configFile;
64 my $help;
65
66 GetOptions( "config=s" => \$configFile,
67 "help|h" => \$help) or pod2usage(2);
68
69 pod2usage(2) if($help);
70 pod2usage(2) unless(defined($configFile));
71
72 #
73 # check fixed paths
74 #
75 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
76 #checkFile($samtoolsBin,"samtools binary");
77
78
79 #
80 # read config and validate values
81 #
82 checkFile($configFile,"configuration ini");
83 my $config = parseConfigIni($configFile);
84
85
86 for (qw(outDir chromOrder)) {
87 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
88 }
89 for (qw(isWriteRealignedBam binSize)) {
90 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
91 }
92
93 my $userconfig = $config->{user};
94
95 my @chromOrder = split(/\t/,$config->{derived}{chromOrder});
96 for my $chrom (@chromOrder) {
97 my $chromSizeKey = "chrom_" . $chrom . "_size";
98 errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey));
99 }
100
101 my $outDir = $config->{derived}{outDir};
102 checkDir($outDir,"output");
103
104
105 my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam};
106
107 for my $chrom (@chromOrder) {
108 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
109 checkDir($chromDir,"input chromosome");
110
111 next unless($isWriteRealignedBam);
112 my $chromSizeKey = "chrom_" . $chrom . "_size";
113 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
114 for my $binId (@$binList) {
115 my $dir = File::Spec->catdir($chromDir,'bins',$binId);
116 checkDir($dir,"input bin");
117 }
118 }
119
120
121
122 # suffix used for large result file intermediates:
123 my $itag = ".incomplete";
124
125
126 #
127 # concatenate vcfs:
128 #
129 sub concatenateVcfs($) {
130 my $fileName = shift;
131
132 my $is_first = 1;
133
134 my $allFileName = "all." . $fileName;
135 my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag);
136 open(my $aFH,'>',"$allFile")
137 || errorX("Failed to open file: '$allFile'");
138
139 # loop over all chroms once to create the header, and one more time for all the data:
140 my $headervcf;
141 for my $chrom (@chromOrder) {
142 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
143 my $iFile = File::Spec->catfile($chromDir,$fileName);
144 checkFile($iFile);
145
146 my $depthKey="maxDepth_${chrom}";
147
148 if($is_first) {
149 open(my $iFH,'<',"$iFile")
150 || errorX("Failed to open file: '$iFile'");
151 $headervcf = Vcf->new(fh=>$iFH);
152 $headervcf->parse_header();
153 $headervcf->remove_header_line(key=>"cmdline");
154 $headervcf->add_header_line({key=>"cmdline",value=>$cmdline});
155 $headervcf->remove_header_line(key=>"$depthKey");
156 close($iFH);
157 $is_first=0;
158 }
159
160 {
161 open(my $iFH,'<',"$iFile")
162 || errorX("Failed to open file: '$iFile'");
163 my $vcf = Vcf->new(fh=>$iFH);
164 $vcf->parse_header();
165 for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) {
166 # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!!
167 $headervcf->add_header_line($line->[0]);
168 }
169 $vcf->close();
170 close($iFH);
171 }
172 }
173 print $aFH $headervcf->format_header();
174 $headervcf->close();
175
176 for my $chrom (@chromOrder) {
177 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
178 my $iFile = File::Spec->catfile($chromDir,$fileName);
179
180 open(my $iFH,'<',"$iFile")
181 || errorX("Failed to open file: '$iFile'");
182
183 my $vcf = Vcf->new(fh=>$iFH);
184 $vcf->parse_header();
185 print $aFH $_ while(<$iFH>);
186 }
187
188 close($aFH);
189
190 # make a second set of files with only the passed variants:
191 my $passedFileName = "passed." . $fileName;
192 my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag);
193 open(my $pFH,'>',"$passedFile")
194 || errorX("Failed to open file: '$passedFile'");
195
196 open(my $arFH,'<',"$allFile")
197 || errorX("Failed to open file: '$allFile'");
198
199 while(<$arFH>) {
200 chomp;
201 unless(/^#/) {
202 my @F = split(/\t/);
203 next if((scalar(@F)>=7) && ($F[6] ne "PASS"));
204 }
205 print $pFH "$_\n";
206 }
207
208 close($arFH);
209 close($pFH);
210
211 my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName);
212 checkMove($allFile,$allFileFinished);
213
214 my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName);
215 checkMove($passedFile,$passedFileFinished);
216 }
217
218 concatenateVcfs("somatic.snvs.vcf");
219 concatenateVcfs("somatic.indels.vcf");
220
221
222 my $bamSuffix = ".realigned.bam";
223
224 sub consolidateBam($) {
225 my $label = shift;
226
227 my $fileName = $label . $bamSuffix;
228
229 my $reDir = File::Spec->catdir($outDir,'realigned');
230 checkMakeDir($reDir);
231
232 my @bamList;
233 for my $chrom (@chromOrder) {
234 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
235
236 my $chromSizeKey = "chrom_" . $chrom . "_size";
237 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
238 for my $binId (@$binList) {
239 my $binDir = File::Spec->catdir($chromDir,'bins',$binId);
240 my $rbamFile = File::Spec->catfile($binDir,$fileName);
241 checkFile($rbamFile,"bin realigned bam file");
242
243 push @bamList,$rbamFile;
244 }
245 }
246
247 return unless(scalar(@bamList));
248
249 my $headerFH = File::Temp->new();
250 my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'";
251 executeCmd($getHeaderCmd);
252
253 my $allFile = File::Spec->catfile($reDir,$fileName . $itag);
254 my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList);
255 executeCmd($cmd);
256
257 my $allFileFinished = File::Spec->catfile($reDir,$fileName);
258 checkMove($allFile,$allFileFinished);
259
260 my $indexCmd="$samtoolsBin index $allFileFinished";
261 executeCmd($indexCmd);
262
263 # for now don't remove all the bin realignments...
264 # unlink(@bamList);
265 }
266
267 if($isWriteRealignedBam) {
268 consolidateBam("normal");
269 consolidateBam("tumor");
270 }
271
272
273 1;
274
275 __END__
276