comparison varscan/varscan_processSomatic.pl @ 0:848f3dc54593 draft

Uploaded
author geert-vandeweyer
date Fri, 07 Mar 2014 06:17:32 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:848f3dc54593
1 #!/usr/bin/perl
2
3
4 use strict;
5 use Cwd;
6
7 die qq(
8 Bad numbr of inputs
9
10 ) if(!@ARGV);
11
12 my $options ="";
13 my $command="";
14 my $input="";
15 #my $output="";
16 my $working_dir = cwd();
17 my $log = '';
18 my %outfiles;
19 foreach my $arg (@ARGV)
20 {
21 my @tmp = split "::", $arg;
22 if($tmp[0] eq "COMMAND")
23 {
24 $command = $tmp[1];
25 }
26 elsif($tmp[0] eq "INPUT")
27 {
28 $input = $tmp[1];
29 }
30 elsif($tmp[0] eq "OPTION")
31 {
32 my @p = split(/\s+/,$tmp[1]);
33 $options = "$options ${tmp[1]}";
34 }
35 elsif($tmp[0] eq "OUTPUT")
36 {
37 my @p = split(/\s+/,$tmp[1]);
38 $p[0] = substr($p[0],2);
39 $outfiles{$p[0]} = $p[1];
40 }
41
42 elsif($tmp[0] eq "LOG")
43 {
44 $log = $tmp[1];
45 }
46 else
47 {
48 die("Unknown Input: $input\n");
49 }
50 }
51 system("ln -s $input $working_dir/in.dat");
52
53
54
55 ## RUN
56 $command = "$command $working_dir/in.dat $options > $log 2>&1";
57 system($command);
58
59 ## if tabular files are kept, write them to galaxy-datafile
60 if ($outfiles{'loh'} ne 'None') {
61 ##
62 system("cp '$working_dir/in.dat.LOH' '".$outfiles{'loh'}."'");
63 system("cp '$working_dir/in.dat.LOH.hc' '".$outfiles{'loh'}."'");
64 system("cp '$working_dir/in.dat.Germline' '".$outfiles{'germ'}."'");
65 system("cp '$working_dir/in.dat.Germline.hc' '".$outfiles{'germ_hc'}."'");
66 system("cp '$working_dir/in.dat.Somatic' '".$outfiles{'som'}."'");
67 system("cp '$working_dir/in.dat.Somatic.hc' '".$outfiles{'som_hc'}."'");
68 }
69 ## if vcf files are kept, generate them, and write to galaxy-datafile
70 if ($outfiles{'loh_hc_vcf'} ne 'None') {
71 tab2vcf($working_dir,'in.dat.LOH.hc',$outfiles{'loh_hc_vcf'});
72 tab2vcf($working_dir,'in.dat.Germline.hc',$outfiles{'germ_hc_vcf'});
73 tab2vcf($working_dir,'in.dat.Somatic.hc',$outfiles{'som_hc_vcf'});
74 }
75 exit;
76
77 sub tab2vcf
78 {
79 my $wd = shift;
80 my $in = shift;
81 my $out = shift;
82 open OUT, ">$out";
83 print OUT "##fileformat=VCFv4.1\n";
84 print OUT "##source=processSomatic\n";
85 print OUT '##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">'."\n";
86 print OUT '##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (Germline,Somatic,LOH)">'."\n";
87 print OUT '##INFO=<ID=VPV,Number=1,Type=Float,Description="Variant P-value">'."\n";
88 print OUT '##INFO=<ID=SPV,Number=1,Type=Float,Description="Somatic Variant P-value">'."\n";
89 print OUT '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">'."\n";
90 print OUT '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">'."\n";
91 print OUT '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">'."\n";
92 print OUT '##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">'."\n";
93 print OUT '##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">'."\n";
94 print OUT '##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">'."\n";
95 print OUT '##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">'."\n";
96 print OUT '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR'."\n";
97 open IN, "$wd/$in";
98 my $head = <IN>;
99 while (<IN>) {
100 chomp;
101 my @p = split(/\t/,$_);
102 my $td = $p[4] + $p[5] + $p[8] + $p[9];
103 my $vpv = sprintf('%.3E',$p[13]);
104 my $spv = sprintf('%.3E',$p[14]);
105 my $info = "DP=$td;SS=$p[12];VPV=$vpv;SPV=$spv";
106 ## gt normal string
107 my $gtnormal = '';
108 if ("$p[7]" eq "$p[2]") {
109 $gtnormal = "0/0";
110 }
111 elsif ("$p[7]" eq "$p[3]") {
112 $gtnormal = "1/1";
113 }
114 else {
115 $gtnormal = "0/1";
116 }
117 my $nsum = $p[4] + $p[5];
118 $gtnormal .= ":.:$nsum:$p[4]:$p[5]:$p[6]:$p[19],$p[20],$p[21],$p[22]";
119 ## gt tumor string
120 my $gttumor = '';
121 if ("$p[11]" eq "$p[2]") {
122 $gttumor = "0/0";
123 }
124 elsif ("$p[11]" eq "$p[3]") {
125 $gttumor = "1/1";
126 }
127 else {
128 $gttumor = "0/1";
129 }
130 my $tsum = $p[8] + $p[9];
131 $gttumor .= ":.:$tsum:$p[8]:$p[9]:$p[10]:$p[15],$p[16],$p[17],$p[18]";
132
133 ## outline
134 my $line = "$p[0]\t$p[1]\t.\t$p[2]\t$p[3]\t.\tPASS\t$info\tGT:GQ:DP:RD:AD:FREQ:DP4\t$gtnormal\t$gttumor\n";
135 print OUT $line;
136 }
137 close IN;
138 close OUT;
139 }
140 sub vs2vcf
141 {
142
143 #
144 # G l o b a l v a r i a b l e s
145 #
146 my $version = '0.1';
147
148 #
149 # Read in file
150 #
151 my $input = shift;
152 my $output = shift;
153 my $chr_ord = shift;
154 open(IN, $input) or die "Can't open $input': $!\n";
155 open(OUT, ">$output") or die "Can't create $output': $!\n";
156 my %output;
157
158 while ( <IN> )
159 {
160 if ( /^#/ )
161 {
162 print OUT;
163 next;
164 }
165 chomp;
166 my $line = $_;
167
168 my @flds = split ( "\t", $line );
169 my $ref = $flds[3];
170 my $alt = $flds[4];
171 #
172 # Deletion of bases
173 #
174 if ( $alt =~ /^\-/ )
175 {
176 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
177 }
178
179 #
180 # Insertion of bases
181 #
182 if ( $alt =~ /^\+/ )
183 {
184 $flds[4] = $ref.substr($alt,1);
185 }
186 ## insert dot for reference positions.
187 if ($flds[4] eq '') {
188 $flds[4] = '.';
189 }
190 print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
191 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
192 }
193 close(IN);
194 # if chromosome order given return in sorted order
195 if(defined $chr_ord)
196 {
197 for my $chrom (@{ $chr_ord })
198 {
199 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
200 {
201 print OUT $output{$chrom}{$pos};
202 }
203 }
204 }
205 close(OUT);
206 }
207
208
209 sub chromosome_order
210 {
211 my $input = shift;
212 # calculate flagstats
213 my $COMM = "samtools view -H $input | grep '^\@SQ'";
214 my @SQ = `$COMM`;
215 chomp @SQ;
216 for(my $i = 0; $i <= $#SQ; $i++)
217 {
218 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
219 }
220 return(@SQ);
221 }
222
223