annotate ngsap-vc/svdetect/BAM_preprocessingPairs.pl @ 3:0d10255b5434 draft default tip

Uploaded
author scisjnu123
date Thu, 03 Oct 2019 10:42:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
1 #!/usr/bin/perl -w
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
2
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
3 use strict;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
4 use warnings;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
5 use Getopt::Std;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
6 my $version = '0.5_galaxy';
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
7
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
8 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
9
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
10 my %opts = ( t=>1, p=>1, n=>1000000, f=>3, s=>0, S=>10000, o=>"." );
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
11
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
12 getopts('dt:p:n:f:s:S:o:b:l:x:N:', \%opts); #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
13
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
14 my $working_dir=($opts{o} ne ".")? $opts{o}:"working directory";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
15
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
16 my $pt_bad_mates_file=$opts{b}; #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
17 my $pt_log_file=$opts{l}; #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
18 my $pt_good_mates_file=$opts{x} if($opts{d}); #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
19
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
20
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
21 die(qq/
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
22
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
23 Description:
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
24
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
25 Preprocessing of mates to get anomalously mapped mate-pair\/paired-end reads as input
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
26 for SVDetect.
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
27
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
28 From all pairs mapped onto the reference genome, this script outputs abnormal pairs:
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
29 - mapped on two different chromosomes
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
30 - with an incorrect strand orientation and\/or pair order
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
31 - with an insert size distance +- sigma threshold
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
32 into a file <prefix.ab.bam\/sam>
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
33
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
34 -BAM\/SAM File input format only, sorted by read names
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
35
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
36 Version : $version
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
37 SAMtools required for BAM files
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
38
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
39
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
40 Usage: BAM_preprocessingPairs.pl [options] <all_mate_file.sorted.bam\/sam>
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
41
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
42 Options: -t BOOLEAN read type: =1 (Illumina), =0 (SOLiD) [$opts{t}]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
43 -p BOOLEAN pair type: =1 (paired-end), =0 (mate-pair) [$opts{p}]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
44 -n INTEGER number of pairs for calculating mu and sigma lengths [$opts{n}]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
45 -s INTEGER minimum value of ISIZE for calculating mu and sigma lengths [$opts{s}]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
46 -S INTEGER maximum value of ISIZE for calculating mu and sigma lengths [$opts{S}]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
47 -f REAL minimal number of sigma fold for filtering pairs [$opts{f}]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
48 -d dump normal pairs into a file [<prefix.norm.bam\/sam>] (optional)
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
49 -o STRING output directory [$working_dir]
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
50
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
51 \n/) if (@ARGV == 0 && -t STDIN);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
52
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
53 unless (-d $opts{o}){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
54 mkdir $opts{o} or die;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
55 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
56 $opts{o}.="/" if($opts{o}!~/\/$/);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
57
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
58 my $mates_file=shift(@ARGV);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
59
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
60 $mates_file=readlink($mates_file);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
61
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
62 my $bad_mates_file=(split(/\//,$mates_file))[$#_];
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
63
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
64 if($bad_mates_file=~/.(s|b)am$/){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
65 $bad_mates_file=~s/.(b|s)am$/.ab.sam/;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
66 $bad_mates_file=$opts{o}.$bad_mates_file;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
67 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
68
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
69 else{
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
70 die "Error: mate_file with the extension <.bam> or <.sam> needed !\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
71 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
72
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
73 my $good_mates_file;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
74 if($opts{d}){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
75 $good_mates_file=(split(/\//,$mates_file))[$#_];
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
76 $good_mates_file=~s/.(b|s)am$/.norm.sam/;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
77 $good_mates_file=$opts{o}.$good_mates_file;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
78 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
79
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
80 my $log_file=$opts{o}.$opts{N}.".svdetect_preprocessing.log"; #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
81
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
82 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
83 #Calculate mu and sigma
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
84
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
85 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
86
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
87 print LOG "\# Calculating mu and sigma lengths...\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
88 print LOG "-- file=$mates_file\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
89 print LOG "-- n=$opts{n}\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
90 print LOG "-- ISIZE min=$opts{s}, max=$opts{S}\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
91
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
92 my ($record, $sumX,$sumX2) = (0,0,0);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
93 my $warn=$opts{n}/10;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
94 my $prev_pair="FIRST";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
95
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
96 my $bam=($mates_file =~ /.bam$/)? 1:0;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
97
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
98 if($bam){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
99 open(MATES, "${SAMTOOLS_BIN_DIR}/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
100 }else{
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
101 open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
102 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
103
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
104 while(<MATES>){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
105
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
106 my @t=split;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
107
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
108 next if ($t[0]=~/^@/);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
109
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
110 my $current_pair=$t[0];
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
111 next if($current_pair eq $prev_pair);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
112 $prev_pair=$current_pair;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
113
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
114 my ($chr1,$chr2,$length)=($t[2],$t[6],abs($t[8]));
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
115
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
116 next if (($t[1]&0x0004) || ($t[1]&0x0008));
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
117 next if ($length<$opts{s} || $length>$opts{S}) ;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
118
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
119 if($chr2 eq "="){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
120
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
121 $sumX += $length; #add to sum and sum^2 for mean and variance calculation
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
122 $sumX2 += $length*$length;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
123 $record++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
124 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
125
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
126 if($record>$warn){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
127 print LOG "-- $warn pairs analysed\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
128 $warn+=$warn;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
129 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
130
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
131 last if ($record>$opts{n});
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
132
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
133 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
134 close (MATES);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
135
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
136 $record--;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
137 my $mu = $sumX/$record;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
138 my $sigma = sqrt($sumX2/$record - $mu*$mu);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
139
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
140 print LOG "-- Total : $record pairs analysed\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
141 print LOG "-- mu length = ".decimal($mu,1).", sigma length = ".decimal($sigma,1)."\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
142
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
143 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
144 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
145 #Preprocessing pairs
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
146
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
147 $warn=100000;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
148
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
149 $record=0;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
150 my %count=( ab=>0, norm=>0, chr=>0, sense=>0, dist=>0, unmap=>0);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
151
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
152 my $read_type=($opts{t})? "Illumina":"SOLiD";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
153 my $pair_type=($opts{p})? "paired-end":"mate-paired";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
154
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
155 print LOG "\# Preprocessing pairs...\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
156 print LOG "-- file= $mates_file\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
157 print LOG "-- type= $read_type $pair_type reads\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
158 print LOG "-- sigma threshold= $opts{f}\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
159 print LOG "-- using ".decimal($mu-$opts{f}*$sigma,4)."-".decimal($mu+$opts{f}*$sigma,4)." as normal range of insert size\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
160
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
161 my @header;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
162
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
163 if($bam){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
164 open(HEADER, "${SAMTOOLS_BIN_DIR}/samtools view -H $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
165 @header=<HEADER>;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
166 close HEADER;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
167 open(MATES, "${SAMTOOLS_BIN_DIR}/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
168 }else{
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
169 open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
170 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
171
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
172 open AB, ">$bad_mates_file" or die "$0: can't write in the output: $bad_mates_file :$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
173 print AB @header if($bam);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
174
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
175 if($opts{d}){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
176 open NORM, ">$good_mates_file" or die "$0: can't write in the output: $good_mates_file :$!\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
177 print NORM @header if($bam);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
178 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
179
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
180 $prev_pair="FIRST";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
181 my $prev_bad;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
182
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
183 while(<MATES>){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
184
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
185 my @t=split;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
186 my $bad=0;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
187
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
188 if ($t[0]=~/^@/){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
189 print AB;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
190 print NORM if ($opts{d});
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
191 next;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
192 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
193
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
194 my $current_pair=$t[0];
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
195 if($current_pair eq $prev_pair){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
196 next if($prev_bad==-1);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
197 if($prev_bad){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
198 print AB;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
199 }elsif(!$prev_bad){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
200 print NORM if($opts{d});
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
201 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
202 next;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
203 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
204
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
205 $prev_pair=$current_pair;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
206
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
207 my ($chr1,$chr2,$pos1,$pos2,$length)=($t[2],$t[6],$t[3],$t[7], abs($t[8]));
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
208
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
209 if (($t[1]&0x0004) || ($t[1]&0x0008)){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
210 $prev_bad=-1;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
211 $count{unmap}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
212 $record++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
213 next;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
214
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
215 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
216
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
217 my $strand1 = (($t[1]&0x0010))? 'R':'F';
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
218 my $strand2 = (($t[1]&0x0020))? 'R':'F';
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
219 my $order1 = (($t[1]&0x0040))? '1':'2';
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
220 my $order2 = (($t[1]&0x0080))? '1':'2';
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
221
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
222 if($order1 == 2){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
223 ($strand1,$strand2)=($strand2,$strand1);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
224 ($chr1,$chr2)=($chr2,$chr1);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
225 ($pos1,$pos2)=($pos2,$pos1);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
226 ($order1,$order2)=($order2,$order1);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
227 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
228
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
229 my $sense=$strand1.$strand2;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
230
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
231 if($chr1 ne "=" && $chr2 ne "="){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
232 $bad=1;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
233 $count{chr}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
234 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
235
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
236 if($opts{p}){ #paired-end
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
237 if(!(($sense eq "FR" && $pos1<$pos2) || ($sense eq "RF" && $pos2<$pos1))){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
238 $bad=1;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
239 $count{sense}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
240 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
241 }else{ #mate-pair
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
242 if($opts{t}){ #Illumina
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
243 if(!(($sense eq "FR" && $pos2<$pos1) || ($sense eq "RF" && $pos1<$pos2))){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
244 $bad=1;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
245 $count{sense}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
246 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
247 }else{ #SOLiD
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
248 if(!(($sense eq "FF" && $pos2<$pos1) || ($sense eq "RR" && $pos1<$pos2))){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
249 $bad=1;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
250 $count{sense}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
251 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
252 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
253 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
254
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
255 if(($chr1 eq "=" || $chr2 eq "=") && ($length <$mu - $opts{f}*$sigma || $length>$mu + $opts{f}*$sigma)){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
256 $bad=1;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
257 $count{dist}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
258 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
259
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
260 if($bad){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
261 print AB;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
262 $count{ab}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
263 $prev_bad=$bad;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
264 }else{
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
265 print NORM if ($opts{d});
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
266 $count{norm}++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
267 $prev_bad=$bad;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
268 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
269
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
270 $record++;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
271
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
272 if($record>$warn){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
273 print LOG "-- $warn pairs analysed\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
274 $warn+=100000;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
275 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
276 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
277
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
278 close AB;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
279 close NORM if($opts{d});
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
280
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
281 print LOG "-- Total : $record pairs analysed\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
282 print LOG "-- $count{unmap} pairs whose one or both reads are unmapped\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
283 print LOG "-- ".($count{ab}+$count{norm})." mapped pairs\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
284 print LOG "---- $count{ab} abnormal mapped pairs\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
285 print LOG "------ $count{chr} pairs mapped on two different chromosomes\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
286 print LOG "------ $count{sense} pairs with incorrect strand orientation and\/or pair order\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
287 print LOG "------ $count{dist} pairs with incorrect insert size distance\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
288 print LOG "--- $count{norm} correct mapped pairs\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
289
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
290 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
291 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
292 #OUTPUT
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
293
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
294 if($bam){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
295
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
296 my $bam_file=$bad_mates_file;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
297 $bam_file=~s/.sam$/.bam/;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
298 print LOG "\# Converting sam to bam for abnormal mapped pairs\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
299 system("${SAMTOOLS_BIN_DIR}/samtools view -bS $bad_mates_file > $bam_file 2>".$opts{o}."samtools.log");
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
300 unlink($bad_mates_file);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
301 print LOG "-- output created: $bam_file\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
302
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
303 system "rm $pt_bad_mates_file ; ln -s $bam_file $pt_bad_mates_file"; #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
304
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
305 if($opts{d}){
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
306 $bam_file=$good_mates_file;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
307 $bam_file=~s/.sam$/.bam/;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
308 print LOG "\# Converting sam to bam for correct mapped pairs\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
309 system("${SAMTOOLS_BIN_DIR}/samtools view -bS $good_mates_file > $bam_file 2>".$opts{o}."samtools.log");
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
310 unlink($good_mates_file);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
311 print LOG "-- output created: $bam_file\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
312
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
313 system "rm $pt_good_mates_file ; ln -s $bam_file $pt_good_mates_file"; #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
314
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
315 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
316
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
317 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
318
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
319 else{
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
320 print LOG "-- output created: $bad_mates_file\n";
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
321 print LOG "-- output created: $good_mates_file\n" if($opts{d});
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
322 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
323
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
324 close LOG;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
325
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
326 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
327
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
328
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
329 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
330 #------------------------------------------------------------------------------#
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
331 sub decimal{
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
332
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
333 my $num=shift;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
334 my $digs_to_cut=shift;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
335
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
336 $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
337
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
338 return $num;
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
339 }
0d10255b5434 Uploaded
scisjnu123
parents:
diff changeset
340 #------------------------------------------------------------------------------#