annotate 2.4/script/Merge_SV.pl @ 16:8eb7d93f7e58 draft

Uploaded
author plus91-technologies-pvt-ltd
date Sat, 31 May 2014 11:23:36 -0400
parents e3609c8714fb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #!/usr/bin/perl -w
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2 use Getopt::Long;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use List::Util qw(min max);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 #Declare variables
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 my ($window,$tmpSpace,$usage,$help,$outFile);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 GetOptions(
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 'v=s{2,}' => \@VCF,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 'o:s' => \$outFile,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 'w:s' => \$window,
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 'h|help' => \$help
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 );
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16 if((!@VCF)||($help)){&usage();exit}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19 if (!$window) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 $window=500;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22 if (!$outFile) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 $outFile="merged.vcf.out";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25 ###########################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 # Protect against merging too many results
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 ###########################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 $tmpSpace='temporarySV_merge';
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 if (-e $tmpSpace) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 #Delete temp file if it exists
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31 unlink $tmpSpace;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 ###########################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34 #For each VCF, create a BEDPE file
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 ###########################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 open(OUT,">>$tmpSpace") or die "Can't write in this directory\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38 for (my $i=0;$i<@VCF;$i++){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 #print STDERR "opening $VCF[$i]\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 open(VCF,$VCF[$i]) or die &usage();
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 while (<VCF>) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 next if ($_=~/^#/);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 chomp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44 @line=split("\t",$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 $mate=$line[4];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 $mate=~s/[A-L]|[N-W]|[Z]|\[|\]//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 @mate=split(/:/,$mate);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 $end1a=$line[1]-$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 $end1b=$line[1]+$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 $end2a=$mate[1]-$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 $end2b=$mate[1]+$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52 next if (($end1a<0)||($end2a<0));
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 if (($line[0]=~/^chr$/)||($mate[0]=~/^chr$/)) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 next;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 print OUT "$line[0]\t$end1a\t$end1b\t$mate[0]\t$end2a\t$end2b\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 print OUT "$mate[0]\t$end2a\t$end2b\t$line[0]\t$end1a\t$end1b\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 close OUT;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 ###########################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 #Now merge the BEDPE into a unique BEDPE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 ###########################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 #Make sure the BEDPE is sorted
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66 #print "Make sure the BEDPE is sorted\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 my $tmpSpace2=join("",$tmpSpace,".2");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 system("cat $tmpSpace|sort -k1,1 -k2,3n -k4,4 -k5,5n -u > $tmpSpace2");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 unlink($tmpSpace);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 #Create output files for the left and right merged BEDPE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 my $tmpSpace3=join("",$tmpSpace,".3");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 my $tmpSpace4=join("",$tmpSpace,".4");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 open (OUT1,">$tmpSpace3") or die "Cant write in this directory\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76 open (OUT2,">$tmpSpace4") or die "Cant write in this directory\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 open(BEDPE,"$tmpSpace2") or die "$tmpSpace2 has already been deleted\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79 #Initialize positions
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 #my ($chr1,$pos2,$pos3,$chr2,$pos3,$pos4);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
81 my (@chr,@pos1,@pos2,@chr2,@pos3,@pos4);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
82 while (<BEDPE>) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
83 ($chr1,$pos2,$pos3,$chr2,$pos3,$pos4)=split("\t",$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
84 if(!$Echr1){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
85 ($Echr1,$Epos1,$Epos2,$Echr2,$Epos3,$Epos4)=split("\t",$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
86 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
87 while (
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
88 ($chr1 =~ /^$Echr1$/)&&
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
89 ($pos2 <= $Epos2+$window)&&
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
90 ($chr2 =~ /^$Echr2$/)&&
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
91 ($pos3 <= $Epos3+$window)
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
92 )
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
93 {$nextline = <BEDPE> ;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
94 last if (!$nextline);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
95 $nextline=~chomp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
96 ($chr1,$pos1,$pos2,$chr2,$pos3,$pos4)=split("\t",$nextline);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
97 #print "NEXTLINE=$nextline";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
98 push (@chr1,$chr1);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
99 push (@pos1,$pos1);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
100 push (@pos2,$pos2);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
101 push (@chr2,$chr2);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
102 push (@pos3,$pos3);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
103 push (@pos4,$pos4);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
104 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
105 ($Echr1,$Epos1,$Epos2,$Echr2,$Epos3,$Epos4)=($chr1[0],min(@pos1),max(@pos2),$chr2[-2],min(@pos3),$pos4[-2]);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
106 #print join("\t",$Echr1,$Epos1,$Epos2,$Echr2,$Epos3,$Epos4);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
107 if($pos1>$pos2){my $tmp=$pos1;$pos1=$pos2;$pos2=$tmp}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
108 if($pos1>$pos2){my $tmp=$pos3;$pos3=$pos4;$pos4=$tmp}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
109 print OUT1 join ("\t",$chr1,$pos1,$pos2)."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
110 print OUT2 join ("\t",$chr2,$pos3,$pos4);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
111 ($Echr1,$Epos1,$Epos2,$Echr2,$Epos3,$Epos4)=($chr1,$pos1,$pos2,$chr2,$pos3,$pos4);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
112 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
113 close BEDPE;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
114 close OUT;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
115 unlink ($tmpSpace2);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
116
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
117 #####################################################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
118 #Now find out for each Unique BEDPE, how many Samples was the SV in?
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
119 #####################################################################
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
120 #FOR EACH VCF
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
121 #get NAME
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
122
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
123 my $tmpSpace5=join("",$tmpSpace,".5");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
124 my $tmpSpace6=join("",$tmpSpace,".6");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
125 my $tmpSpace7=join("",$tmpSpace,".7");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
126 my $tmpSpace8=join("",$tmpSpace,".8");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
127 my $tmpSpace9=join("",$tmpSpace,".9");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
128
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
129 #Create a placeholder file
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
130 system("paste $tmpSpace3 $tmpSpace4| awk '{OFS=\"\\t\"}{print \$1,\$2,\$3,\$4,\$5,\$6,0,\"NA\"}' > $tmpSpace7");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
131 #Convert the VCF into a BED PE
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
132 for (my $i=0;$i<@VCF;$i++){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
133 open (OUT,">$tmpSpace5") or die "Cant write in this directory\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
134 open(VCF,$VCF[$i]) ;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
135 print STDERR "Starting on $VCF[$i]\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
136 while (<VCF>) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
137 next if ($_=~/^#/);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
138 chomp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
139 @line=split("\t",$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
140 $mate=$line[4];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
141 $mate=~s/[A-L]|[N-W]|[Z]|\[|\]//g;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
142 @mate=split(/:/,$mate);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
143 $end1a=$line[1]-$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
144 $end1b=$line[1]+$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
145 $end2a=$mate[1]-$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
146 $end2b=$mate[1]+$window;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
147 next if (($end1a<0)||($end2a<0));
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
148 if (($line[0]=~/^chr$/)||($mate[0]=~/^chr$/)) {
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
149 #print "$_\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
150 next;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
151 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
152 print OUT "$line[0]\t$end1a\t$end1b\t$mate[0]\t$end2a\t$end2b\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
153 print OUT "$mate[0]\t$end2a\t$end2b\t$line[0]\t$end1a\t$end1b\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
154 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
155 close VCF;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
156 close OUT;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
157 #for each row in $tmpSpace3, count the number of overlaps on both sides
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
158 my $left=join("",$tmpSpace,".left");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
159 my $right=join("",$tmpSpace,".right");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
160 system("intersectBed -a $tmpSpace3 -b $tmpSpace5 -loj -c > $left");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
161 system("intersectBed -a $tmpSpace4 -b $tmpSpace5 -loj -c > $right");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
162
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
163 my $Lcount=`wc -l $left|cut -f1 -d" "`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
164 my $Rcount=`wc -l $right|cut -f1 -d" "`;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
165 if ($Lcount != $Rcount){die "Need to check for errors in $left and $right\n\n"}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
166 system("paste $left $right > $tmpSpace5");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
167 system ("rm $left $right");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
168 open (IN,"$tmpSpace5") or die "Cant find $tmpSpace5\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
169 open (OUT,">$tmpSpace6") or die "Cant write in this directory\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
170 while(<IN>){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
171 $_=~chomp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
172 @lines=split("\t",$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
173 if(($lines[3] > 0)&&($lines[6] > 0)){print OUT "1\t$VCF[$i]\n"}else{print OUT "0\t.\n"}
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
174 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
175 close IN;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
176 close OUT;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
177
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
178 system("paste $tmpSpace7 $tmpSpace6 > $tmpSpace8");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
179 #system("head $tmpSpace7 $tmpSpace8");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
180 open (IN,"$tmpSpace8") or die "Cant find $tmpSpace8\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
181 open (OUT,">$tmpSpace9") or die "Cant write in this directory\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
182 my ($Samples,$NumSamples,$EVENT);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
183 while(<IN>){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
184 $_=~chomp;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
185 @lines=split("\t",$_);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
186
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
187 if ($lines[8] > 0){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
188 $Samples=$lines[7].";".$lines[9];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
189 $Samples=~s/^NA;//;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
190 $NumSamples=$lines[6]+$lines[8];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
191 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
192 else{
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
193 $Samples=$lines[7];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
194 $NumSamples=$lines[6];
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
195 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
196 print OUT join ("\t",@lines[0..5],$NumSamples,$Samples)."\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
197 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
198 close IN;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
199 close OUT;
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
200 print STDERR "completed with $VCF[$i]\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
201 system("cp $tmpSpace9 $tmpSpace7");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
202 }
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
203
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
204 system("cp $tmpSpace7 $outFile");
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
205 unlink ($tmpSpace9, $tmpSpace8, $tmpSpace7, $tmpSpace9,$tmpSpace3, $tmpSpace4, $tmpSpace5, $tmpSpace6);
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
206 print STDERR "Your results are in $outFile\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
207
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
208
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
209 sub usage(){
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
210 print "
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
211 ###
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
212 ### This script will merge multiple SoftSearch VCF files
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
213 ###
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
214
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
215 Usage: Merge_SV.pl -v <vcf1> <vcf2> <vcfN> -w [500] -o <output file>
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
216
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
217 Note: Must have bedtools installed and in your path\n\n\n";
e3609c8714fb Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
218 }