annotate SVDetect_run_parallel.pl @ 6:f6ccaaed3654 draft

Uploaded
author bzeitouni
date Mon, 11 Jun 2012 12:31:19 -0400
parents ba8c5e544948
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1 #!/usr/bin/perl -w
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3 =pod
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
4
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
5 =head1 NAME
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
6
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
7 SVDetect - Program designed to the detection of structural variations
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
8 from paired-end/mate-pair sequencing data, compatible with SOLiD and Illumina (>=1.3) reads
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
9
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
10 Version: 0.8 for Galaxy
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
11
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
12 =head1 SYNOPSIS
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
13
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
14 SVDetect <command> -conf <configuration_file> [-help] [-man]
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
15
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
16 Command:
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
17
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
18 linking detection and isolation of links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
19 filtering filtering of links according different parameters
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
20 links2circos links conversion to circos format
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
21 links2bed paired-ends of links converted to bed format (UCSC)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
22 links2SV formatted output to show most significant SVs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
23 cnv calculate copy-number profiles
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
24 ratio2circos ratio conversion to circos density format
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
25 ratio2bedgraph ratio conversion to bedGraph density format (UCSC)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
26
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
27 =head1 DESCRIPTION
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
28
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
29 This is a command-line interface to SVDetect.
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
30
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
31
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
32 =head1 AUTHORS
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
33
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
34 Bruno Zeitouni E<lt>bruno.zeitouni@curie.frE<gt>,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
35 Valentina Boeva E<lt>valentina.boeva@curie.frE<gt>
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
36
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
37 =cut
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
38
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
39 # -------------------------------------------------------------------
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
40
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
41 use strict;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
42 use warnings;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
43
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
44 use Pod::Usage;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
45 use Getopt::Long;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
46
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
47 use Config::General;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
48 use Tie::IxHash;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
49 use FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
50 use Parallel::ForkManager;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
51
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
52 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
53 #PARSE THE COMMAND LINE
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
54 my %OPT;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
55 GetOptions(\%OPT,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
56 'conf=s',
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
57 'out1=s', #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
58 'out2=s', #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
59 'out3=s', #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
60 'out4=s', #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
61 'out5=s', #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
62 'l=s', #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
63 'N=s',#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
64 'help',#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
65 'man'
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
66 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
67
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
68 pod2usage() if $OPT{help};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
69 pod2usage(-verbose=>2) if $OPT{man};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
70 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
71
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
72 pod2usage() if(@ARGV<1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
73
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
74 tie (my %func, 'Tie::IxHash',linking=>\&createlinks,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
75 filtering=>\&filterlinks,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
76 links2circos=>\&links2circos,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
77 links2bed=>\&links2bed,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
78 links2compare=>\&links2compare,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
79 links2SV=>\&links2SV,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
80 cnv=>\&cnv,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
81 ratio2circos=>\&ratio2circos,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
82 ratio2bedgraph=>\&ratio2bedgraph);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
83
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
84 foreach my $command (@ARGV){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
85 pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
86 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
87 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
88
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
89
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
91 #READ THE CONFIGURATION FILE
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
92 my $conf=Config::General->new( -ConfigFile => $OPT{conf},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
93 -Tie => "Tie::IxHash",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
94 -AllowMultiOptions => 1,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
95 -LowerCaseNames => 1,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
96 -AutoTrue => 1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
97 my %CONF= $conf->getall;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
98 validateconfiguration(\%CONF); #validation of the configuration parameters
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
99
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
100 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
101
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
102 my $pt_log_file=$OPT{l}; #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
103 my $pt_links_file=$OPT{out1} if($OPT{out1}); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
104 my $pt_flinks_file=$OPT{out2} if($OPT{out2}); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
105 my $pt_sv_file=$OPT{out3} if($OPT{out3}); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
106 my $pt_circos_file=$OPT{out4} if($OPT{out4}); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
107 my $pt_bed_file=$OPT{out5} if($OPT{out5}); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
108
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
109 $CONF{general}{mates_file}=readlink($CONF{general}{mates_file});#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
110 $CONF{general}{cmap_file}=readlink($CONF{general}{cmap_file});#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
111
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
112 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_run.log"; #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
113 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
114 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
115
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
116 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
117 #COMMAND EXECUTION
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
118 foreach my $command (@ARGV){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
119 &{$func{$command}}();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
120 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
121 print LOG "-- end\n";#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
122
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
123 close LOG;#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
124 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
125 exit(0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
126 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
127
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
128
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
129 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
130 #FUNCTIONS
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
131 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
132 #MAIN FUNCTION number 1: Detection of links from mate-pairs data
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
133 sub createlinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
134
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
135 my %CHR; #main hash table 1: fragments, links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
136 my %CHRID;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
137 my @MATEFILES;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
138
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
139 my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
140 my @path=split(/\//,$output_prefix);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
141 $output_prefix=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
142 my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
143 my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
144
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
145 shearingChromosome(\%CHR, \%CHRID, #making the genomic fragment library with the detection parameters
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
146 $CONF{detection}{window_size},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
147 $CONF{detection}{step_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
148 $CONF{general}{cmap_file});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
149
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
150 if($CONF{detection}{split_mate_file}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
151
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
152 splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
153 $CONF{general}{sv_type},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
154 $CONF{general}{mates_file},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
155 $CONF{general}{input_format},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
156 $CONF{general}{read_lengths}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
157 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
158 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
159
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
160 @MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted mate files already created at $CONF{general}{tmp_dir} :$!";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
161 chomp(@MATEFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
162 print LOG "# Splitted mate files already created.\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
163 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
164
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
165
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
166 #Parallelization of the linking per chromosome for intra + interchrs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
167 my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
168
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
169 foreach my $matefile (@MATEFILES){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
170
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
171 my $pid = $pm->start and next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
172 getlinks(\%CHR, \%CHRID, $matefile);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
173 $pm->finish;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
174
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
175 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
176 $pm->wait_all_children;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
177
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
178 #Merge the chromosome links file into only one
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
179 my @LINKFILES= qx{ls $tmp_links_prefix*links} or die "# Error: No links files created at $CONF{general}{tmp_dir} :$!";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
180 chomp(@LINKFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
181 catFiles( \@LINKFILES => "$output_prefix.links" );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
182
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
183 system "rm $pt_links_file; ln -s $output_prefix.links $pt_links_file" if (defined $pt_links_file); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
184 print LOG "# Linking end procedure : output created: $output_prefix.links\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
185 #unlink(@LINKFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
186 #unlink(@MATEFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
187
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
188 undef %CHR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
189 undef %CHRID;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
190
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
191 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
192 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
193 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
194 sub getlinks {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
195
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
196 my ($chr,$chrID,$tmp_mates_prefix)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
197
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
198 my $tmp_links_prefix=$tmp_mates_prefix;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
199 $tmp_links_prefix=~s/\/mates\//\/links\//;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
200
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
201 my %PAIR; #main hash table 2: pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
202
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
203 linking($chr,$chrID, \%PAIR, #creation of all links from chromosome coordinates of pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
204 $CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
205 $CONF{detection}{window_size},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
206 $CONF{detection}{step_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
207 $tmp_mates_prefix,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
208 $CONF{general}{input_format},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
209 $CONF{general}{sv_type},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
210 "$tmp_links_prefix.links.mapped"
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
211 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
212
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
213 getUniqueLinks("$tmp_links_prefix.links.mapped", #remove the doublons
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
214 "$tmp_links_prefix.links.unique");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
215
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
216 defineCoordsLinks($chr,$chrID, \%PAIR, #definition of the precise coordinates of links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
217 $CONF{general}{input_format},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
218 $CONF{general}{sv_type},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
219 $CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
220 "$tmp_links_prefix.links.unique",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
221 "$tmp_links_prefix.links.unique_defined");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
222
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
223 sortLinks("$tmp_links_prefix.links.unique_defined", #sorting links from coordinates
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
224 "$tmp_links_prefix.links.sorted");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
225
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
226 removeFullyOverlappedLinks("$tmp_links_prefix.links.sorted", #remove redundant links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
227 "$tmp_links_prefix.links",1); #file output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
228
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
229
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
230 undef %PAIR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
231
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
232 unlink("$tmp_links_prefix.links.mapped",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
233 "$tmp_links_prefix.links.unique",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
234 "$tmp_links_prefix.links.unique_defined",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
235 "$tmp_links_prefix.links.sorted");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
236 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
237 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
238 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
239 sub splitMateFile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
240
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
241 my ($chr,$chrID,$files_list,$output_prefix,$sv_type,$mates_file,$input_format,$tag_length)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
242
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
243 print LOG "# Splitting the mate file \"$mates_file\" for parallel processing...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
244
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
245 my %filesHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
246
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
247 #fichier matefile inter
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
248 if($sv_type=~/^(all|inter)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
249 my $newFileName="$output_prefix.interchrs";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
250 push(@{$files_list},$newFileName);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
251 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
252 $fh->open(">$newFileName");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
253 $filesHandle{inter}=$fh;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
254 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
255
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
256 #fichiers matefiles intra
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
257 if($sv_type=~/^(all|intra)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
258 foreach my $k (1..$chr->{nb_chrs}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
259 my $newFileName=$output_prefix.".".$chr->{$k}->{name};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
260 push(@{$files_list},$newFileName);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
261 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
262 $fh->open(">$newFileName");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
263 $filesHandle{$k}=$fh;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
264 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
265 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
266
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
267 if ($mates_file =~ /.gz$/) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
268 open(MATES, "gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
269 }elsif($mates_file =~ /.bam$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
270 open(MATES, "$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
271 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
272 open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
273 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
274
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
275
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
276 while(<MATES>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
277
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
278 my @t=split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
279 my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
280
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
281 next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
282
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
283 next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
284
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
285 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
286
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
287 if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
288 my $fh2print=$filesHandle{inter};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
289 print $fh2print join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
290 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
291
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
292 if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
293 my $fh2print=$filesHandle{$chr_read1};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
294 print $fh2print join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
295
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
296 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
297 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
298
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
299 foreach my $name (keys %filesHandle){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
300 my $fh=$filesHandle{$name};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
301 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
302 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
303
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
304 print LOG "# Splitted mate files of \"$mates_file\" created.\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
305 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
306
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
307
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
308 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
309 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
310 sub filterlinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
311
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
312 my %CHR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
313 my %CHRID;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
314 my @LINKFILES;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
315 my @FLINKFILES;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
316
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
317 my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
318 my @path=split(/\//,$output_prefix);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
319 $output_prefix=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
320 my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
321
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
322 createChrHashTables(\%CHR,\%CHRID,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
323 $CONF{general}{cmap_file});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
324
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
325 if($CONF{filtering}{split_link_file}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
326
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
327 splitLinkFile(\%CHR, \%CHRID, \@LINKFILES,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
328 $tmp_links_prefix,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
329 $CONF{general}{sv_type},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
330 "$output_prefix.links",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
331 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
332 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
333
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
334 @LINKFILES=qx{ls $tmp_links_prefix*links} or die "# Error: No splitted link files already created\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
335 chomp(@LINKFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
336 print LOG "# Splitted link files already created.\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
337 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
338
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
339 #Parallelization of the filtering per chromosome for intra + interchrs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
340 my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
341
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
342 foreach my $linkfile (@LINKFILES){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
343
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
344 my $pid = $pm->start and next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
345 getFilteredlinks(\%CHR, \%CHRID, $linkfile);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
346 $pm->finish;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
347
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
348 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
349 $pm->wait_all_children;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
350
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
351 #Merge the chromosome links file into only one
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
352 @FLINKFILES= qx{ls $tmp_links_prefix*filtered} or die "# Error: No links files created\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
353 chomp(@FLINKFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
354 catFiles( \@FLINKFILES => "$output_prefix.links.filtered" );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
355
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
356 system "rm $pt_flinks_file; ln -s $output_prefix.links.filtered $pt_flinks_file" if (defined $pt_flinks_file); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
357 print LOG"# Filtering end procedure : output created: $output_prefix.links.filtered\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
358
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
359 undef %CHR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
360 undef %CHRID;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
361
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
362 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
363 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
364 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
365 sub splitLinkFile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
366
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
367 my ($chr,$chrID,$files_list,$input_prefix,$sv_type,$link_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
368
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
369 print LOG "# Splitting the link file for parallel processing...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
370
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
371 my %filesHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
372
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
373 #fichier matefile inter
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
374 if($sv_type=~/^(all|inter)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
375 my $newFileName="$input_prefix.interchrs.links";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
376 push(@{$files_list},$newFileName);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
377 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
378 $fh->open(">$newFileName");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
379 $filesHandle{inter}=$fh;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
380 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
381
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
382 #fichiers matefiles intra
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
383 if($sv_type=~/^(all|intra)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
384 foreach my $k (1..$chr->{nb_chrs}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
385 my $newFileName=$input_prefix.".".$chr->{$k}->{name}.".links";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
386 push(@{$files_list},$newFileName);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
387 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
388 $fh->open(">$newFileName");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
389 $filesHandle{$k}=$fh;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
390 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
391 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
392
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
393 open LINKS, "<".$link_file or die "$0: can't open ".$link_file.":$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
394 while(<LINKS>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
395
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
396 my @t=split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
397 my ($chr_read1,$chr_read2)=($t[0],$t[3]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
398
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
399 next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
400
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
401 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
402
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
403 if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
404 my $fh2print=$filesHandle{inter};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
405 print $fh2print join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
406 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
407
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
408 if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
409 my $fh2print=$filesHandle{$chr_read1};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
410 print $fh2print join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
411
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
412 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
413 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
414
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
415 foreach my $name (keys %filesHandle){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
416 my $fh=$filesHandle{$name};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
417 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
418 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
419
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
420 print LOG "# Splitted link files created.\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
421 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
422
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
423
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
424 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
425 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
426 #MAIN FUNCTION number 2: Filtering processing
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
427 sub getFilteredlinks {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
428
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
429 my ($chr,$chrID,$tmp_links_prefix)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
430 my %PAIR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
431
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
432 strandFiltering($chr,$chrID,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
433 $CONF{filtering}{nb_pairs_threshold}, #filtering of links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
434 $CONF{filtering}{strand_filtering},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
435 $CONF{filtering}{chromosomes},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
436 $CONF{general}{input_format},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
437 $CONF{general}{cmap_file},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
438 $CONF{general}{mates_orientation},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
439 $CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
440 $tmp_links_prefix,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
441 "$tmp_links_prefix.filtered",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
442 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
443
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
444 if($CONF{filtering}{strand_filtering}){ #re-definition of links coordinates with strand filtering
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
445
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
446 my @tmpfiles;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
447
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
448 rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_unique");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
449
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
450 getUniqueLinks("$tmp_links_prefix.filtered_unique",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
451 "$tmp_links_prefix.filtered");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
452
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
453 push(@tmpfiles,"$tmp_links_prefix.filtered_unique");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
454
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
455 if($CONF{filtering}{order_filtering}){ #filtering using the order
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
456
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
457 rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_ordered");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
458
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
459 orderFiltering($chr,$chrID,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
460 $CONF{filtering}{nb_pairs_threshold},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
461 $CONF{filtering}{nb_pairs_order_threshold},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
462 $CONF{filtering}{mu_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
463 $CONF{filtering}{sigma_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
464 $CONF{general}{mates_orientation},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
465 $CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
466 "$tmp_links_prefix.filtered_ordered",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
467 "$tmp_links_prefix.filtered",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
468 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
469
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
470 push(@tmpfiles,"$tmp_links_prefix.filtered_ordered");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
471 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
472
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
473 if (($CONF{filtering}{insert_size_filtering})&&
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
474 ($CONF{general}{sv_type} ne 'inter')){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
475
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
476 rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_withoutIndelSize");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
477
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
478 addInsertionInfo($chr,$chrID,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
479 $CONF{filtering}{nb_pairs_threshold},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
480 $CONF{filtering}{order_filtering},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
481 $CONF{filtering}{indel_sigma_threshold},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
482 $CONF{filtering}{dup_sigma_threshold},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
483 $CONF{filtering}{singleton_sigma_threshold},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
484 $CONF{filtering}{mu_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
485 $CONF{filtering}{sigma_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
486 $CONF{general}{mates_orientation},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
487 $CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
488 "$tmp_links_prefix.filtered_withoutIndelSize",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
489 "$tmp_links_prefix.filtered"
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
490 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
491
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
492 push(@tmpfiles,"$tmp_links_prefix.filtered_withoutIndelSize");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
493 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
494
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
495 sortLinks("$tmp_links_prefix.filtered",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
496 "$tmp_links_prefix.filtered_sorted");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
497
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
498 removeFullyOverlappedLinks("$tmp_links_prefix.filtered_sorted",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
499 "$tmp_links_prefix.filtered_nodup",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
500 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
501
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
502 postFiltering("$tmp_links_prefix.filtered_nodup",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
503 "$tmp_links_prefix.filtered",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
504 $CONF{filtering}{final_score_threshold});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
505
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
506 push(@tmpfiles,"$tmp_links_prefix.filtered_sorted","$tmp_links_prefix.filtered_nodup");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
507
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
508 unlink(@tmpfiles);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
509
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
510
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
511 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
512 undef %PAIR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
513
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
514 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
515 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
516 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
517 #MAIN FUNCTION number 3: Circos format conversion for links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
518 sub links2circos{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
519
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
520 my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
521 my @path=split(/\//,$input_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
522 $input_file=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
523
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
524 my $output_file.=$input_file.".segdup.txt";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
525
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
526 links2segdup($CONF{circos}{organism_id},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
527 $CONF{circos}{colorcode},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
528 $input_file,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
529 $output_file); #circos file output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
530
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
531 system "rm $pt_circos_file; ln -s $output_file $pt_circos_file" if (defined $pt_circos_file); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
532 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
533 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
534 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
535 #MAIN FUNCTION number 4: Bed format conversion for links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
536 sub links2bed{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
537
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
538 my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
539 my @path=split(/\//,$input_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
540 $input_file=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
541
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
542 my $output_file.=$input_file.".bed.txt";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
543
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
544 links2bedfile($CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
545 $CONF{bed}{colorcode},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
546 $input_file,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
547 $output_file); #bed file output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
548
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
549 system "rm $pt_bed_file; ln -s $output_file $pt_bed_file" if (defined $pt_bed_file); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
550
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
551 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
552 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
553 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
554 #MAIN FUNCTION number 6: Bed format conversion for links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
555 sub links2SV{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
556
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
557 my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
558
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
559 my @path=split(/\//,$input_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
560 $input_file=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
561
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
562 my $output_file.=$input_file.".sv.txt";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
563
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
564
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
565 links2SVfile( $input_file,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
566 $output_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
567
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
568 system "rm $pt_sv_file; ln -s $output_file $pt_sv_file" if (defined $pt_sv_file); #GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
569 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
570 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
571 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
572 #MAIN FUNCTION number 7: copy number variations, coverage ratio calculation
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
573 sub cnv{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
574
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
575 my %CHR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
576 my %CHRID;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
577 my @MATEFILES;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
578 my @MATEFILES_REF;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
579
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
580 my $output_prefix=$CONF{general}{mates_file};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
581 my $output_prefix_ref=$CONF{detection}{mates_file_ref};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
582 my @path=split(/\//,$output_prefix);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
583 my @path_ref=split(/\//,$output_prefix_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
584 $output_prefix=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
585 $output_prefix_ref=$CONF{general}{output_dir}.$path_ref[$#path_ref];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
586 my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
587 my $tmp_mates_prefix_ref=$CONF{general}{tmp_dir}."mates/".$path_ref[$#path_ref];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
588 my $tmp_density_prefix=$CONF{general}{tmp_dir}."density/".$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
589
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
590 shearingChromosome(\%CHR, \%CHRID, #making the genomic fragment library with the detection parameters
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
591 $CONF{detection}{window_size},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
592 $CONF{detection}{step_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
593 $CONF{general}{cmap_file});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
594
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
595 if($CONF{detection}{split_mate_file}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
596
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
597 splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
598 "intra",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
599 $CONF{general}{mates_file},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
600 $CONF{general}{input_format},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
601 $CONF{general}{read_lengths}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
602 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
603
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
604 splitMateFile(\%CHR, \%CHRID, \@MATEFILES_REF, $tmp_mates_prefix_ref,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
605 "intra",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
606 $CONF{detection}{mates_file_ref},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
607 $CONF{general}{input_format},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
608 $CONF{general}{read_lengths}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
609 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
610
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
611
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
612 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
613
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
614 @MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted sample mate files of \"$CONF{general}{mates_file}\" already created at $CONF{general}{tmp_dir} :$!";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
615 chomp(@MATEFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
616 @MATEFILES_REF=qx{ls $tmp_mates_prefix_ref*} or die "# Error: No splitted reference mate files of \"$CONF{detection}{mates_file_ref}\" already created at $CONF{general}{tmp_dir} :$!";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
617 chomp(@MATEFILES_REF);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
618 print LOG "# Splitted sample and reference mate files already created.\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
619 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
620
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
621 #Parallelization of the cnv per chromosome
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
622 my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
623
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
624 foreach my $file (0..$#MATEFILES){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
625
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
626 my $pid = $pm->start and next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
627
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
628 densityCalculation(\%CHR, \%CHRID, $file,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
629 $CONF{general}{read_lengths},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
630 $CONF{detection}{window_size},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
631 $CONF{detection}{step_length},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
632 \@MATEFILES,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
633 \@MATEFILES_REF,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
634 $MATEFILES[$file].".density",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
635 $CONF{general}{input_format});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
636
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
637 $pm->finish;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
638
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
639 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
640 $pm->wait_all_children;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
641
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
642 #Merge the chromosome links file into only one
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
643 my @DENSITYFILES= qx{ls $tmp_density_prefix*density} or die "# Error: No density files created at $CONF{general}{tmp_dir} :$!";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
644 chomp(@DENSITYFILES);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
645 catFiles( \@DENSITYFILES => "$output_prefix.density" );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
646
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
647 print LOG "# cnv end procedure : output created: $output_prefix.density\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
648
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
649
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
650 undef %CHR;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
651 undef %CHRID;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
652
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
653 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
654 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
655 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
656 #MAIN FUNCTION number 8: Circos format conversion for cnv ratios
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
657 sub ratio2circos{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
658
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
659 my $input_file=$CONF{general}{mates_file}.".density";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
660
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
661 my @path=split(/\//,$input_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
662 $input_file=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
663
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
664 my $output_file.=$input_file.".segdup.txt";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
665
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
666 ratio2segdup($CONF{circos}{organism_id},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
667 $input_file,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
668 $output_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
669 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
670 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
671 #MAIN FUNCTION number 9: BedGraph format conversion for cnv ratios
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
672 sub ratio2bedgraph{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
673
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
674 my $input_file=$CONF{general}{mates_file}.".density";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
675
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
676 my @path=split(/\//,$input_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
677 $input_file=$CONF{general}{output_dir}.$path[$#path];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
678
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
679 my $output_file.=$input_file.".bedgraph.txt";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
680
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
681 ratio2bedfile($input_file,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
682 $output_file); #bed file output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
683 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
684 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
685 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
686 #Creation of the fragment library
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
687 sub shearingChromosome{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
688
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
689 print LOG "# Making the fragments library...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
690
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
691 my ($chr,$chrID,$window,$step,$cmap_file)=@_; #window and step sizes parameters
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
692
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
693 createChrHashTables($chr,$chrID,$cmap_file); #hash tables: chromosome ID <=> chromsomes Name
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
694
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
695 foreach my $k (1..$chr->{nb_chrs}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
696
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
697 print LOG"-- $chr->{$k}->{name}\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
698
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
699 my $frag=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
700 for (my $start=0; $start<$chr->{$k}->{length}; $start+=$step){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
701
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
702 my $end=($start<($chr->{$k}->{length})-$window)? $start+$window-1:($chr->{$k}->{length})-1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
703 $chr->{$k}->{$frag}=[$start,$end]; #creation of fragments, coordinates storage
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
704
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
705 if($end==($chr->{$k}->{length})-1){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
706 $chr->{$k}->{nb_frag}=$frag; #nb of fragments per chromosome
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
707 last;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
708 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
709 $frag++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
710 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
711 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
712 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
713 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
714 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
715 #Creation of chromosome hash tables from the cmap file
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
716 sub createChrHashTables{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
717
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
718 my ($chr,$chrID,$cmap_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
719 $chr->{nb_chrs}=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
720
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
721 open CMAP, "<".$cmap_file or die "$0: can't open ".$cmap_file.":$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
722 while(<CMAP>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
723
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
724 if(/^\s+$/){ next;}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
725 my ($k,$name,$length) = split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
726 $chr->{$k}->{name}=$name;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
727 $chr->{$k}->{length}=$length;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
728 $chrID->{$name}=$k;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
729 $chr->{nb_chrs}++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
730
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
731 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
732 close CMAP;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
733 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
734 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
735 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
736 #Read the mate file according the input format file (solid, eland or sam)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
737 sub readMateFile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
738
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
739 my ($chr1,$chr2,$pos1,$pos2,$order1,$order2,$t,$file_type,$tag_length)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
740 my ($strand1,$strand2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
741
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
742 if($file_type eq "solid"){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
743
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
744 ($$chr1,$$chr2,$$pos1,$$pos2,$$order1,$$order2)=($$t[6],$$t[7],$$t[8]+1,$$t[9]+1,1,2); #0-based
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
745
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
746 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
747 my ($tag_length1,$tag_length2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
748 ($$chr1,$$chr2,$$pos1,$strand1,$$pos2,$strand2,$$order1,$$order2,$tag_length1,$tag_length2)=($$t[11],$$t[12],$$t[7],$$t[8],$$t[9],$$t[10],1,2,length($$t[1]),length($$t[2])) #1-based
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
749 if($file_type eq "eland");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
750
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
751 if($file_type eq "sam"){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
752
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
753 return 0 if ($$t[0]=~/^@/); #header sam filtered out
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
754
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
755 ($$chr1,$$chr2,$$pos1,$$pos2)=($$t[2],$$t[6],$$t[3],$$t[7]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
756
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
757 return 0 if ($$chr1 eq "*" || $$chr2 eq "*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
758
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
759 $$chr2=$$chr1 if($$chr2 eq "=");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
760
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
761 $strand1 = (($$t[1]&0x0010))? 'R':'F';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
762 $strand2 = (($$t[1]&0x0020))? 'R':'F';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
763
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
764 $$order1= (($$t[1]&0x0040))? '1':'2';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
765 $$order2= (($$t[1]&0x0080))? '1':'2';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
766 $tag_length1 = $tag_length->{$$order1};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
767 $tag_length2 = $tag_length->{$$order2};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
768 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
769
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
770 $$pos1 = -($$pos1+$tag_length1) if ($strand1 eq "R"); #get sequencing starts
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
771 $$pos2 = -($$pos2+$tag_length2) if ($strand2 eq "R");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
772 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
773 return 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
774 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
775 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
776 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
777 #Parsing of the mates files and creation of links between 2 chromosomal fragments
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
778 sub linking{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
779
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
780 my ($chr,$chrID,$pair,$tag_length,$window_dist,$step,$mates_file,$input_format,$sv_type,$links_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
781 my %link;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
782
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
783 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
784 my $nb_links=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
785 my $warn=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
786
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
787 my @sfile=split(/\./,$mates_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
788 my $fchr=$sfile[$#sfile];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
789
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
790 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
791
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
792 print LOG "# $fchr : Linking procedure...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
793 print LOG "-- file=$mates_file\n".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
794 "-- chromosome=$fchr\n".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
795 "-- input format=$input_format\n".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
796 "-- type=$sv_type\n".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
797 "-- read1 length=$tag_length->{1}, read2 length=$tag_length->{2}\n".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
798 "-- window size=$window_dist, step length=$step\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
799
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
800 if ($mates_file =~ /.gz$/) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
801 $fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
802 }elsif($mates_file =~ /.bam$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
803 $fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
804 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
805 $fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
806 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
807
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
808
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
809 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
810
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
811 my @t=split; #for each mate-pair
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
812 my $mate=$t[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
813 my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
814
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
815 next if(exists $$pair{$mate});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
816
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
817 next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
818
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
819 next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
820
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
821 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
822
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
823 if($sv_type ne "all"){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
824 if( ($sv_type eq "inter") && ($chr_read1 ne $chr_read2) ||
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
825 ($sv_type eq "intra") && ($chr_read1 eq $chr_read2) ){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
826 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
827 next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
828 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
829 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
830
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
831 $$pair{$mate}=[$chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2 ]; #fill out the hash pair table (ready for the defineCoordsLinks function)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
832
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
833 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
834
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
835 my ($coord_start_read1,$coord_end_read1,$coord_start_read2,$coord_end_read2); #get the coordinates of each read
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
836
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
837 recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
838 recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
839
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
840 for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){ #fast genome parsing for link creation
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
841
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
842 if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
843
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
844 if(overlap($coord_start_read1,$coord_end_read1,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1])){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
845
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
846 for(my $j=1;$j<=$chr->{$chr_read2}->{'nb_frag'};$j++){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
847
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
848 if (abs ($coord_start_read2-${$chr->{$chr_read2}->{$j}}[0]) <= $window_dist) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
849
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
850 if(overlap($coord_start_read2,$coord_end_read2,${$chr->{$chr_read2}->{$j}}[0],${$chr->{$chr_read2}->{$j}}[1])){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
851
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
852 makeLink(\%link,$chr_read1,$i,$chr_read2,$j,$mate,\$nb_links); #make the link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
853 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
854
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
855 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
856
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
857 $j=getNextFrag($coord_start_read2,$j,${$chr->{$chr_read2}->{$j}}[0],$chr->{$chr_read2}->{nb_frag},$window_dist,$step);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
858 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
859 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
860 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
861
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
862 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
863
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
864 $i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
865 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
866 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
867
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
868 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
869 print LOG "-- $fchr : $warn mate-pairs analysed - $nb_links links done\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
870 $warn+=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
871 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
872 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
873 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
874
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
875 if(!$nb_links){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
876 print LOG "-- $fchr : No mate-pairs !\n".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
877 "-- $fchr : No links have been found with the selected type of structural variations \($sv_type\)\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
878 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
879
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
880 print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_links links done\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
881
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
882 print LOG "-- $fchr : writing...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
883
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
884 $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
885
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
886 $fh->open(">".$links_file) or die "$0: can't write in the output ".$links_file." :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
887
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
888 foreach my $chr1 ( sort { $a <=> $b} keys %link){ #Sorted links output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
889
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
890 foreach my $chr2 ( sort { $a <=> $b} keys %{$link{$chr1}}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
891
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
892 foreach my $frag1 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
893
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
894 foreach my $frag2 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}{$frag1}}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
895
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
896 my @count=split(",",$link{$chr1}{$chr2}{$frag1}{$frag2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
897 print $fh "$chr->{$chr1}->{name}\t".(${$chr->{$chr1}->{$frag1}}[0]+1)."\t".(${$chr->{$chr1}->{$frag1}}[1]+1)."\t".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
898 "$chr->{$chr2}->{name}\t".(${$chr->{$chr2}->{$frag2}}[0]+1)."\t".(${$chr->{$chr2}->{$frag2}}[1]+1)."\t".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
899 scalar @count."\t". #nb of read
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
900 $link{$chr1}{$chr2}{$frag1}{$frag2}."\n"; #mate list
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
901 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
902 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
903 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
904 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
905
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
906 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
907
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
908 undef %link;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
909
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
910 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
911 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
912 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
913 #remove exact links doublons according to the mate list
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
914 sub getUniqueLinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
915
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
916 my ($links_file,$nrlinks_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
917 my %links;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
918 my %pt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
919 my $nb_links;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
920 my $n=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
921
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
922 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
923 my $warn=300000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
924
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
925 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
926 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
927
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
928 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
929
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
930 print LOG "# $fchr : Getting unique links...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
931 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
932
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
933 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
934
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
935 my @t=split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
936 my $mates=$t[7];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
937 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
938
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
939 if(!exists $links{$mates}){ #Unique links selection
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
940
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
941 $links{$mates}=[@t];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
942 $pt{$n}=$links{$mates};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
943 $n++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
944
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
945
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
946 }else{ #get the link coordinates from the mate-pairs list
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
947
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
948 for my $i (1,2,4,5){ #get the shortest regions
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
949
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
950 $links{$mates}->[$i]=($t[$i]>$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i] #maximum start
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
951 if($i==1 || $i==4);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
952 $links{$mates}->[$i]=($t[$i]<$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i] #minimum end
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
953 if($i==2 || $i==5);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
954 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
955 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
956 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
957 print LOG "-- $fchr : $warn links analysed - ".($n-1)." unique links done\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
958 $warn+=300000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
959 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
960 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
961 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
962
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
963 $nb_links=$n-1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
964 print LOG "-- $fchr : Total : $record links analysed - $nb_links unique links done\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
965
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
966 $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
967 $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
968 print LOG "-- $fchr : writing...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
969 for my $i (1..$nb_links){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
970
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
971 print $fh join("\t",@{$pt{$i}})."\n"; #all links output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
972 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
973
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
974 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
975
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
976 undef %links;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
977 undef %pt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
978
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
979 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
980 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
981 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
982 #get the new coordinates of each link from the mate list
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
983 sub defineCoordsLinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
984
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
985 my ($chr,$chrID,$pair,$input_format,$sv_type,$tag_length,$links_file,$clinks_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
986
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
987 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
988 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
989
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
990 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
991 my $fh2 = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
992
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
993 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
994 $fh2->open(">$clinks_file") or die "$0: can't write in the output: $clinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
995
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
996 print LOG "# $fchr : Defining precise link coordinates...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
997
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
998 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
999 my $warn=100000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1000
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1001 my %coords;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1002 my %strands;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1003 my %order;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1004 my %ends_order;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1005
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1006 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1007
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1008
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1009 my ($col1,$col2)=(1,2); #for an intrachromosomal link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1010 my $diffchr=0; #difference between chr1 and chr2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1011 my ($chr1,$chr2,$mates_list,$npairs)=(split)[0,3,7,8];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1012 ($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1013 if ($chr1 != $chr2){ #for an interchromosomal link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1014 $col1=$col2=0; #no distinction
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1015 $diffchr=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1016 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1017
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1018 my @pairs=split(",",$mates_list);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1019
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1020 $coords{$col1}{$chr1}->{start}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1021 $coords{$col1}{$chr1}->{end}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1022 $coords{$col2}{$chr2}->{start}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1023 $coords{$col2}{$chr2}->{end}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1024 $strands{$col1}{$chr1}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1025 $strands{$col2}{$chr2}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1026 $ends_order{$col1}{$chr1}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1027 $ends_order{$col2}{$chr2}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1028
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1029
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1030 $order{$col1}{$chr1}->{index}->{1}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1031 $order{$col1}{$chr1}->{index}->{2}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1032 $order{$col2}{$chr2}->{index}->{1}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1033 $order{$col2}{$chr2}->{index}->{2}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1034 $order{$col1}{$chr1}->{order}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1035 $order{$col2}{$chr2}->{order}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1036
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1037 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1038
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1039 for my $p (0..$#pairs){ #for each pair
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1040
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1041 my ($coord_start_read1,$coord_end_read1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1042 my ($coord_start_read2,$coord_end_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1043 my $strand_read1=recupCoords(${$$pair{$pairs[$p]}}[2],\$coord_start_read1,\$coord_end_read1,$tag_length->{${$$pair{$pairs[$p]}}[4]},$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1044 my $strand_read2=recupCoords(${$$pair{$pairs[$p]}}[3],\$coord_start_read2,\$coord_end_read2,$tag_length->{${$$pair{$pairs[$p]}}[5]},$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1045
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1046 if(!$diffchr){ #for a intrachromosomal link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1047 if($coord_start_read2<$coord_start_read1){ #get the closer start coordinate for each column
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1048 ($col1,$col2)=(2,1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1049 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1050 ($col1,$col2)=(1,2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1051 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1052 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1053
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1054 push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{start}},$coord_start_read1); #get coords and strands of f3 and r3 reads
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1055 push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{end}},$coord_end_read1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1056 push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{start}},$coord_start_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1057 push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{end}},$coord_end_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1058 push(@{$strands{$col1}{${$$pair{$pairs[$p]}}[0]}},$strand_read1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1059 push(@{$strands{$col2}{${$$pair{$pairs[$p]}}[1]}},$strand_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1060 push(@{$ends_order{$col1}{${$$pair{$pairs[$p]}}[0]}},${$$pair{$pairs[$p]}}[4]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1061 push(@{$ends_order{$col2}{${$$pair{$pairs[$p]}}[1]}},${$$pair{$pairs[$p]}}[5]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1062 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1063
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1064 ($col1,$col2)=(1,2) if(!$diffchr);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1065
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1066 my $coord_start_chr1=min(min(@{$coords{$col1}{$chr1}->{start}}),min(@{$coords{$col1}{$chr1}->{end}})); #get the biggest region
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1067 my $coord_end_chr1=max(max(@{$coords{$col1}{$chr1}->{start}}),max(@{$coords{$col1}{$chr1}->{end}}));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1068 my $coord_start_chr2=min(min(@{$coords{$col2}{$chr2}->{start}}),min(@{$coords{$col2}{$chr2}->{end}}));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1069 my $coord_end_chr2=max(max(@{$coords{$col2}{$chr2}->{start}}),max(@{$coords{$col2}{$chr2}->{end}}));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1070
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1071 @{$order{$col1}{$chr1}->{index}->{1}}= sort {${$coords{$col1}{$chr1}->{start}}[$a] <=> ${$coords{$col1}{$chr1}->{start}}[$b]} 0 .. $#{$coords{$col1}{$chr1}->{start}};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1072 @{$order{$col2}{$chr2}->{index}->{1}}= sort {${$coords{$col2}{$chr2}->{start}}[$a] <=> ${$coords{$col2}{$chr2}->{start}}[$b]} 0 .. $#{$coords{$col2}{$chr2}->{start}};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1073
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1074 foreach my $i (@{$order{$col1}{$chr1}->{index}->{1}}){ #get the rank of the chr2 reads according to the sorted chr1 reads (start coordinate sorting)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1075 foreach my $j (@{$order{$col2}{$chr2}->{index}->{1}}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1076
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1077 if(${$order{$col1}{$chr1}->{index}->{1}}[$i] == ${$order{$col2}{$chr2}->{index}->{1}}[$j]){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1078 ${$order{$col1}{$chr1}->{index}->{2}}[$i]=$i;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1079 ${$order{$col2}{$chr2}->{index}->{2}}[$i]=$j;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1080 last;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1081 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1082 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1083 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1084
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1085 foreach my $i (@{$order{$col1}{$chr1}->{index}->{2}}){ #use rank chr1 as an ID
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1086 foreach my $j (@{$order{$col2}{$chr2}->{index}->{2}}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1087
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1088 if(${$order{$col1}{$chr1}->{index}->{2}}[$i] == ${$order{$col2}{$chr2}->{index}->{2}}[$j]){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1089 ${$order{$col1}{$chr1}->{order}}[$i]=$i+1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1090 ${$order{$col2}{$chr2}->{order}}[$i]=$j+1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1091 last;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1092 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1093 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1094 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1095
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1096 @pairs=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},\@pairs);#sorting of the pairs, strands, and start coords from the sorted chr2 reads
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1097 @{$strands{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col1}{$chr1});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1098 @{$strands{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col2}{$chr2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1099 @{$ends_order{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col1}{$chr1});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1100 @{$ends_order{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col2}{$chr2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1101 @{$coords{$col1}{$chr1}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col1}{$chr1}->{start});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1102 @{$coords{$col2}{$chr2}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col2}{$chr2}->{start});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1103
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1104
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1105 my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1, #all information output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1106 $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1107 scalar @pairs,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1108 join(",",@pairs),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1109 join(",",@{$strands{$col1}{$chr1}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1110 join(",",@{$strands{$col2}{$chr2}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1111 join(",",@{$ends_order{$col1}{$chr1}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1112 join(",",@{$ends_order{$col2}{$chr2}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1113 join(",",@{$order{$col1}{$chr1}->{order}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1114 join(",",@{$order{$col2}{$chr2}->{order}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1115 join(",",@{$coords{$col1}{$chr1}->{start}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1116 join(",",@{$coords{$col2}{$chr2}->{start}}));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1117
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1118 print $fh2 join("\t",@link)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1119
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1120 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1121 print LOG "-- $fchr : $warn links processed\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1122 $warn+=100000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1123 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1124 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1125 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1126 $fh2->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1127
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1128 print LOG "-- $fchr : Total : $record links processed\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1129
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1130 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1131 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1132 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1133 #Sort links according the concerned chromosomes and their coordinates
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1134 sub sortLinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1135
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1136 my ($links_file,$sortedlinks_file,$unique)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1137
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1138 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1139 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1140
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1141
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1142 print LOG "# $fchr : Sorting links...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1143
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1144 my $pipe=($unique)? "| sort -u":"";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1145 system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1146
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1147 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1148 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1149 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1150 #removal of fully overlapped links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1151 sub removeFullyOverlappedLinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1152
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1153 my ($links_file,$nrlinks_file,$warn_out)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1154
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1155 my %pt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1156 my $n=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1157
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1158 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1159 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1160
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1161 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1162
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1163 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1164 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1165
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1166 my @t=split("\t",$_);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1167 $pt{$n}=[@t];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1168 $n++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1169 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1170 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1171
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1172 my $nb_links=$n-1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1173 my $nb=$nb_links;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1174
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1175 my %pt2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1176 my $nb2=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1177 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1178 my $warn=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1179
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1180 print LOG "# $fchr : Removing fully overlapped links...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1181
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1182 LINK:
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1183
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1184 for my $i (1..$nb){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1185
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1186 my @link=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1187 my @next_link=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1188 my $ind1=$i;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1189
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1190 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1191 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1192 print LOG "-- $fchr : $warn unique links analysed - ".($nb2-1)." non-overlapped links done\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1193 $warn+=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1194 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1195
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1196 if(exists $pt{$ind1}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1197 @link=@{$pt{$ind1}}; #link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1198 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1199 next LINK;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1200 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1201
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1202 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=($link[0],$link[1],$link[2],$link[3],$link[4],$link[5]); #get info of link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1203 my @mates=deleteBadOrderSensePairs(split(",",$link[7]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1204
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1205 my $ind2=$ind1+1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1206 $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb); #get the next found link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1207
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1208 if($ind2<=$nb){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1209
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1210 @next_link=@{$pt{$ind2}}; #link2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1211 my ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]); #get info of link2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1212 my @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1213
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1214 while(($chr1 eq $chr3 && $chr2 eq $chr4) && overlap($start1,$end1,$start3,$end3)){ #loop here according to the chr1 coordinates, need an overlap between links to enter
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1215
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1216 if(!overlap($start2,$end2,$start4,$end4)){ #if no overlap with chr2 coordinates ->next link2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1217
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1218 $ind2++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1219 $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1220
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1221 if($ind2>$nb){ #if no more link in the file -> save link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1222
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1223 $pt2{$nb2}=\@link;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1224 $nb2++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1225 next LINK;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1226 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1227
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1228 @next_link=@{$pt{$ind2}};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1229 ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1230 @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1231 next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1232 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1233
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1234 my %mates=map{$_ =>1} @mates; #get the equal number of mates
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1235 my @same_mates = grep( $mates{$_}, @next_mates );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1236 my $nb_mates= scalar @same_mates;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1237
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1238 if($nb_mates == scalar @mates){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1239
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1240 delete $pt{$ind1}; #if pairs of link 1 are all included in link 2 -> delete link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1241 next LINK; #go to link2, link2 becomes link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1242
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1243 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1244 delete $pt{$ind2} if($nb_mates == scalar @next_mates); #if pairs of link2 are all included in link 1 -> delete link2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1245 $ind2++; #we continue by checking the next link2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1246 $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1247
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1248 if($ind2>$nb){ #if no more link in the file -> save link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1249
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1250 $pt2{$nb2}=\@link;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1251 $nb2++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1252 next LINK;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1253 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1254
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1255 @next_link=@{$pt{$ind2}}; #get info of link2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1256 ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1257 @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1258
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1259 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1260 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1261 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1262 $pt2{$nb2}=\@link; #if no (more) link with chr1 coordinates overlap -> save link1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1263 $nb2++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1264 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1265
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1266 print LOG "-- $fchr : Total : $nb_links unique links analysed - ".($nb2-1)." non-overlapped links done\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1267
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1268 #OUTPUT
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1269
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1270 $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1271 $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1272 print LOG "-- $fchr : writing...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1273 for my $i (1..$nb2-1){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1274
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1275 print $fh join("\t",@{$pt2{$i}}); #all links output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1276 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1277
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1278 close $fh;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1279
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1280 print LOG "-- $fchr : output created: $nrlinks_file\n" if($warn_out);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1281
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1282 undef %pt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1283 undef %pt2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1284 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1285 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1286 sub postFiltering {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1287
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1288 my ($links_file,$pflinks_file, $finalScore_thres)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1289
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1290 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1291 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1292
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1293
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1294 my ($nb,$nb2)=(0,0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1295
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1296 print LOG "# $fchr : Post-filtering links...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1297 print LOG "-- $fchr : final score threshold = $finalScore_thres\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1298
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1299 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1300 my $fh2 = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1301
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1302 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1303 $fh2->open(">$pflinks_file") or die "$0: can't write in the output: $pflinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1304
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1305
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1306 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1307
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1308 my @t=split("\t",$_);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1309 my $score=$t[$#t-1];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1310
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1311 if($score >= $finalScore_thres){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1312 print $fh2 join("\t", @t);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1313 $nb2++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1314 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1315 $nb++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1316 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1317 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1318 $fh2->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1319
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1320 print LOG "-- $fchr : Total : $nb unique links analysed - $nb2 links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1321 print LOG "-- $fchr : output created: $pflinks_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1322 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1323
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1324
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1325
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1326 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1327 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1328 #Filtering of the links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1329 sub strandFiltering{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1330
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1331 my($chr,$chrID,$pairs_threshold,$strand_filtering,$chromosomes,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1332 $input_format,$cmap_file,$mate_sense, $tag_length,$links_file,$flinks_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1333
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1334 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1335 my $fchr=$sfile[$#sfile-1];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1336
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1337
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1338 my %chrs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1339 my %chrs1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1340 my %chrs2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1341 my $nb_chrs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1342 my $exclude;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1343
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1344 if($chromosomes){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1345 my @chrs=split(",",$chromosomes);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1346 $nb_chrs=scalar @chrs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1347 $exclude=($chrs[0]=~/^\-/)? 1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1348 for my $chrName (@chrs){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1349 $chrName=~s/^(\-)//;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1350 my $col=($chrName=~s/_(1|2)$//);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1351
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1352 if(!$col){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1353 $chrs{$chrID->{$chrName}}=undef
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1354 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1355 $chrs1{$chrID->{$chrName}}=undef if($1==1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1356 $chrs2{$chrID->{$chrName}}=undef if($1==2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1357 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1358 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1359 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1360
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1361 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1362 my $nb_links=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1363 my $warn=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1364
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1365 my $sens_ratio_threshold=0.6;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1366
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1367 print LOG "\# Filtering procedure...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1368 print LOG "\# Number of pairs and strand filtering...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1369 print LOG "-- file=$links_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1370 print LOG "-- nb_pairs_threshold=$pairs_threshold, strand_filtering=".(($strand_filtering)? "yes":"no").
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1371 ", chromosomes=".(($chromosomes)? "$chromosomes":"all")."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1372
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1373
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1374
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1375 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1376 my $fh2 = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1377
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1378 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1379 $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1380
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1381 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1382
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1383 my @t=split; #for each link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1384 my $is_good=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1385 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1386
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1387
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1388 if($chromosomes){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1389
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1390 my ($chr1,$chr2)=($chrID->{$t[0]},$chrID->{$t[3]});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1391
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1392 if(!$exclude){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1393 $is_good=(exists $chrs{$chr1} && exists $chrs{$chr2})? 1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1394 $is_good=(exists $chrs1{$chr1} && exists $chrs2{$chr2})? 1:0 if(!$is_good);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1395 $is_good=($nb_chrs==1 && (exists $chrs1{$chr1} || exists $chrs2{$chr2}))? 1:0 if(!$is_good);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1396 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1397 $is_good=(exists $chrs{$chr1} || exists $chrs{$chr2})? 0:1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1398 $is_good=(exists $chrs1{$chr1} || exists $chrs2{$chr2})? 0:1 if($is_good);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1399 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1400 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1401
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1402 $is_good = ($is_good && $t[6] >= $pairs_threshold)? 1 :0; #filtering according the number of pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1403 if($is_good && $strand_filtering){ #if filtering according the strand sense
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1404
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1405 my @mates=split(/,/,$t[7]); #get the concordant pairs in the strand sense
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1406 my @strands1=split(/,/,$t[8]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1407 my @strands2=split(/,/,$t[9]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1408
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1409 my %mate_class=( 'FF' => 0, 'RR' => 0, 'FR' => 0, 'RF' => 0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1410
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1411 my %mate_reverse=( 'FF' => 'RR', 'RR' => 'FF', #group1: FF,RR
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1412 'FR' => 'RF', 'RF' => 'FR'); #group2: FR,RF
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1413
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1414 my %mate_class2=( $mate_sense=>"NORMAL_SENSE", inverseSense($mate_sense)=>"NORMAL_SENSE",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1415 substr($mate_sense,0,1).inverseSense(substr($mate_sense,1,1))=>"REVERSE_SENSE",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1416 inverseSense(substr($mate_sense,0,1)).substr($mate_sense,1,1)=>"REVERSE_SENSE");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1417
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1418 if($t[6] == 1){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1419
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1420 push(@t,$mate_class2{$strands1[0].$strands2[0]},"1/1",1,1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1421
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1422 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1423
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1424 tie (my %class,'Tie::IxHash');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1425 my $split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1426
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1427 foreach my $i (0..$#mates){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1428 $mate_class{$strands1[$i].$strands2[$i]}++; #get the over-represented group
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1429 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1430
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1431 my $nb_same_sens_class=$mate_class{FF}+$mate_class{RR};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1432 my $nb_diff_sens_class=$mate_class{FR}+$mate_class{RF};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1433 my $sens_ratio=max($nb_same_sens_class,$nb_diff_sens_class)/($nb_same_sens_class+$nb_diff_sens_class);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1434
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1435 if($sens_ratio < $sens_ratio_threshold){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1436 %class=(1=>'FF', 2=>'FR');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1437 $split=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1438 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1439 $class{1}=($nb_same_sens_class > $nb_diff_sens_class)? 'FF':'FR'; #if yes get the concerned class
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1440 $split=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1441 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1442
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1443 $is_good=getConsistentSenseLinks(\@t,\@mates,\@strands1,\@strands2,$tag_length,$mate_sense,\%mate_reverse,\%mate_class2,\%class,$split,$pairs_threshold);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1444 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1445 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1446
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1447 if($is_good){ #PRINT
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1448
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1449 my $nb=scalar @t;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1450 if($nb > 20){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1451 my @t2=splice(@t,0,20);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1452 print $fh2 join("\t",@t2)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1453 $nb_links++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1454 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1455 $nb_links++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1456 print $fh2 join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1457 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1458
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1459 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1460 print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1461 $warn+=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1462 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1463 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1464 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1465 $fh2->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1466
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1467 print LOG "-- $fchr : No links have been found with the selected filtering parameters\n" if(!$nb_links);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1468
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1469 print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1470
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1471
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1472 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1473 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1474 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1475 sub getConsistentSenseLinks{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1476
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1477 my ($t,$mates,$strands1,$strands2,$tag_length,$mate_sense, $mate_reverse,$mate_class2, $class, $split,$thres)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1478
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1479 my $npairs=scalar @$mates;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1480
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1481 my @ends_order1 = split (/,/,$$t[10]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1482 my @ends_order2 = split (/,/,$$t[11]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1483 my @order1 = split (/,/,$$t[12]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1484 my @order2 = split (/,/,$$t[13]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1485 my @positions1 = split (/,/,$$t[14]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1486 my @positions2 = split (/,/,$$t[15]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1487
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1488 my @newlink;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1489
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1490 foreach my $ind (keys %{$class} ){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1491
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1492 tie (my %flink,'Tie::IxHash');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1493 my @orders2remove=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1494
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1495 foreach my $i (0..$#{$mates}){ #get the pairs belonging the over-represented group
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1496
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1497 if((($$strands1[$i].$$strands2[$i]) eq $$class{$ind}) || (($$strands1[$i].$$strands2[$i]) eq $$mate_reverse{$$class{$ind}})){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1498 push(@{$flink{mates}},$$mates[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1499 push(@{$flink{strands1}},$$strands1[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1500 push(@{$flink{strands2}},$$strands2[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1501 push(@{$flink{ends_order1}},$ends_order1[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1502 push(@{$flink{ends_order2}},$ends_order2[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1503 push(@{$flink{positions1}},$positions1[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1504 push(@{$flink{positions2}},$positions2[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1505
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1506 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1507 push(@orders2remove,$order1[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1508 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1509 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1510
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1511 @{$flink{order1}}=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1512 @{$flink{order2}}=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1513 if(scalar @orders2remove > 0){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1514 getNewOrders(\@order1,\@order2,\@orders2remove,$flink{order1},$flink{order2})
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1515 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1516 @{$flink{order1}}=@order1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1517 @{$flink{order2}}=@order2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1518 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1519
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1520 my @ends1; getEnds(\@ends1,$flink{positions1},$flink{strands1},$flink{ends_order1},$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1521 my @ends2; getEnds(\@ends2,$flink{positions2},$flink{strands2},$flink{ends_order2},$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1522
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1523 my $fnpairs=scalar @{$flink{mates}};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1524 my $strand_filtering_ratio=$fnpairs."/".$npairs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1525 my $real_ratio=$fnpairs/$npairs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1526
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1527 if($fnpairs>=$thres){ #filtering according the number of pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1528
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1529 push(@newlink,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1530 $$t[0],
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1531 min(min(@{$flink{positions1}}),min(@ends1)),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1532 max(max(@{$flink{positions1}}),max(@ends1)),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1533 $$t[3],
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1534 min(min(@{$flink{positions2}}),min(@ends2)),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1535 max(max(@{$flink{positions2}}),max(@ends2)),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1536 $fnpairs,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1537 join(",",@{$flink{mates}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1538 join(",",@{$flink{strands1}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1539 join(",",@{$flink{strands2}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1540 join(",",@{$flink{ends_order1}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1541 join(",",@{$flink{ends_order2}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1542 join(",",@{$flink{order1}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1543 join(",",@{$flink{order2}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1544 join(",",@{$flink{positions1}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1545 join(",",@{$flink{positions2}}),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1546 $$mate_class2{${$flink{strands1}}[0].${$flink{strands2}}[0]},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1547 $strand_filtering_ratio,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1548 $real_ratio,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1549 $npairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1550 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1551 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1552 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1553
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1554 if (grep {defined($_)} @newlink) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1555 @$t=@newlink;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1556 return 1
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1557 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1558 return 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1559
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1560 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1561 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1562 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1563 sub getNewOrders{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1564
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1565 my($tab1,$tab2,$list,$newtab1,$newtab2)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1566 my $j=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1567 my $k=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1568 for my $i (0..$#{$tab2}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1569 my $c=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1570 for my $j (0..$#{$list}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1571 $c++ if(${$list}[$j] < ${$tab2}[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1572 if(${$list}[$j] == ${$tab2}[$i]){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1573 $c=-1; last;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1574 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1575 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1576 if($c!=-1){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1577 push(@{$newtab2}, ${$tab2}[$i]-$c);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1578 push(@{$newtab1}, $k);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1579 $k++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1580 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1581 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1582 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1583
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1584 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1585 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1586 #Filtering of the links using their order
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1587 sub orderFiltering {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1588
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1589 my ($chr,$chrID,$nb_pairs_threshold,$nb_pairs_order_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1590
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1591 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1592 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1593
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1594
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1595 my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1596
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1597 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1598 my $warn=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1599 my $nb_links=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1600
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1601 my $quant05 = 1.644854;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1602 my $quant001 = 3.090232;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1603 my $alphaDist = $quant05 * 2 * $sigma;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1604 my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1605
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1606 print LOG "\# Filtering by order...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1607 print LOG "-- mu length=$mu, sigma length=$sigma, nb pairs order threshold=$nb_pairs_order_threshold\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1608 print LOG "-- distance between comparable pairs was set to $alphaDist\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1609 print LOG "-- maximal fragment length was set to $maxFragmentLength\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1610
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1611
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1612 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1613 my $fh2 = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1614
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1615 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1616 $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1617
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1618 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1619
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1620 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1621 my @t = split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1622 my ($chr1,$chr2,$mates_list)=@t[0,3,7];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1623 my @pairs=split(",",$mates_list);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1624 ($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1625 my ($coord_start_chr1,$coord_end_chr1,$coord_start_chr2,$coord_end_chr2) = @t[1,2,4,5];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1626 my $numberOfPairs = $t[6];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1627 my @strand1 = split (/,/,$t[8]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1628 my @strand2 = split (/,/,$t[9]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1629 my @ends_order1 = split (/,/,$t[10]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1630 my @ends_order2 = split (/,/,$t[11]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1631 my @order1 = split (/,/,$t[12]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1632 my @order2 = split (/,/,$t[13]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1633 my @positions1 = split (/,/,$t[14]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1634 my @positions2 = split (/,/,$t[15]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1635 my @ends1; getEnds(\@ends1,\@positions1,\@strand1,\@ends_order1,$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1636 my @ends2; getEnds(\@ends2,\@positions2,\@strand2,\@ends_order2,$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1637 my $clusterCoordinates_chr1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1638 my $clusterCoordinates_chr2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1639 my $reads_left = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1640
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1641 my $ifRenv = $t[16];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1642 my $strand_ratio_filtering=$t[17];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1643
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1644 #kind of strand filtering. For example, will keep only FFF-RRR from a link FFRF-RRRF if <F-R> orientation is correct
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1645 my ($singleBreakpoint, %badInFRSense) = findBadInFRSenseSOLiDSolexa(\@strand1,\@strand2,\@ends_order1,\@ends_order2,\@order1,\@order2,$mate_sense);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1646 #find pairs type F-RRRR or FFFF-R in the case if <R-F> orientation is correct
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1647 #These pairs are annotated as BED pairs forever! They won't be recycled!
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1648 my $table;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1649 for my $i (0..$numberOfPairs-1) { #fill the table with non adequate pairs: pairID numberOfNonAdPairs nonAdPairIDs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1650 my $nonAdeq = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1651 for my $j (0..$i-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1652 if (exists($table->{$j}->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1653 $nonAdeq++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1654 $table->{$i}->{$j} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1655 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1656 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1657 for my $j ($i+1..$numberOfPairs-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1658 if ($positions1[$j]-$positions1[$i]>$alphaDist) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1659 if (&reversed ($i,$j,$ifRenv,\@positions2)) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1660 $nonAdeq++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1661 $table->{$i}->{$j} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1662 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1663 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1664 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1665 $table->{$i}->{nonAdeq} = $nonAdeq;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1666 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1667
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1668 for my $bad (keys %badInFRSense) { #remove pairs type F-RRRR or FFFF-R in the case of <R-F> orientation
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1669 &remove($bad,$table);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1670 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1671
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1672 my @falseReads;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1673 #RRRR-F -> RRRR or R-FFFF -> FFFF
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1674 @falseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense, keys %{$table});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1675 #these pairs will be recycled later as $secondTable
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1676 for my $bad (@falseReads) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1677 &remove($bad,$table);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1678 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1679
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1680 my $bad = &check($table);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1681 while ($bad ne "OK") { #clear the table to reject non adequate pairs in the sense of ORDER
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1682 # push (@falseReads, $bad); remove completely!!!
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1683 &remove($bad,$table);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1684 $bad = &check($table);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1685 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1686
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1687 $reads_left = scalar keys %{$table};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1688 my $coord_start_chr1_cluster1 = min(min(@positions1[sort {$a<=>$b} keys %{$table}]),min(@ends1[sort {$a<=>$b} keys %{$table}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1689 my $coord_end_chr1_cluster1 = max(max(@positions1[sort {$a<=>$b} keys %{$table}]),max(@ends1[sort {$a<=>$b} keys %{$table}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1690 my $coord_start_chr2_cluster1 = min(min(@positions2[sort {$a<=>$b} keys %{$table}]),min(@ends2[sort {$a<=>$b} keys %{$table}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1691 my $coord_end_chr2_cluster1 = max(max(@positions2[sort {$a<=>$b} keys %{$table}]),max(@ends2[sort {$a<=>$b} keys %{$table}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1692
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1693 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1694 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1695
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1696 my $ifBalanced = 'UNBAL';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1697 my $secondTable;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1698 my $clusterCoordinates;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1699 my ($break_pont_chr1,$break_pont_chr2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1700
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1701 my $signatureType="";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1702
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1703 my $maxCoord1 =$chr->{$chr1}->{length};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1704 my $maxCoord2 =$chr->{$chr2}->{length};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1705
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1706 if (scalar @falseReads) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1707 @falseReads = sort @falseReads;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1708 #now delete FRFR choosing the majority
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1709 my @newfalseReads; #find and remove pairs type RRRR-F or R-FFFF
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1710 @newfalseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense,@falseReads); #these @newfalseReads won't be recycled
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1711 my %hashTmp;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1712 for my $count1 (0..scalar(@falseReads)-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1713 my $i = $falseReads[$count1];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1714 $hashTmp{$i} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1715 for my $bad (@newfalseReads) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1716 if ($bad == $i) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1717 delete $hashTmp{$i};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1718 next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1719 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1720 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1721 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1722 @falseReads = sort keys %hashTmp; #what is left
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1723 for my $count1 (0..scalar(@falseReads)-1) { #fill the table for reads which were previously rejected
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1724 my $nonAdeq = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1725 my $i = $falseReads[$count1];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1726
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1727 for my $count2 (0..$count1-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1728 my $j = $falseReads[$count2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1729 if (exists($secondTable->{$j}->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1730 $nonAdeq++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1731 $secondTable->{$i}->{$j} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1732 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1733 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1734 for my $count2 ($count1+1..scalar(@falseReads)-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1735 my $j = $falseReads[$count2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1736 if ($positions1[$j]-$positions1[$i]>$alphaDist) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1737 if (&reversed ($i,$j,$ifRenv,\@positions2)) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1738 $nonAdeq++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1739 $secondTable->{$i}->{$j} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1740 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1741 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1742 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1743 $secondTable->{$i}->{nonAdeq} = $nonAdeq;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1744 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1745
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1746 my @falseReads2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1747 my $bad = &check($secondTable);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1748 while ($bad ne "OK") { #clear the table to reject non adequate pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1749 push (@falseReads2, $bad);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1750 &remove($bad,$secondTable);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1751 $bad = &check($secondTable);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1752 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1753 if (scalar keys %{$secondTable} >= $nb_pairs_order_threshold) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1754 my $coord_start_chr1_cluster2 = min(min(@positions1[sort {$a<=>$b} keys %{$secondTable}]),min(@ends1[sort {$a<=>$b} keys %{$secondTable}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1755 my $coord_end_chr1_cluster2 = max(max(@positions1[sort {$a<=>$b} keys %{$secondTable}]),max(@ends1[sort {$a<=>$b} keys %{$secondTable}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1756 my $coord_start_chr2_cluster2 = min(min(@positions2[sort {$a<=>$b} keys %{$secondTable}]),min(@ends2[sort {$a<=>$b} keys %{$secondTable}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1757 my $coord_end_chr2_cluster2 = max(max(@positions2[sort {$a<=>$b} keys %{$secondTable}]),max(@ends2[sort {$a<=>$b} keys %{$secondTable}]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1758
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1759 $ifBalanced = 'BAL';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1760
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1761 if ($ifBalanced eq 'BAL') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1762
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1763 if (scalar keys %{$table} < $nb_pairs_order_threshold) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1764 $ifBalanced = 'UNBAL'; #kill cluster 1!
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1765 ($table,$secondTable)=($secondTable,$table); #this means that one needs to exchange cluster1 with cluster2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1766 $reads_left = scalar keys %{$table};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1767 $coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1768 $coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1769 $coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1770 $coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1771 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1772 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1773
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1774 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1775
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1776 $reads_left += scalar keys %{$secondTable};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1777 next if ($reads_left < $nb_pairs_threshold);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1778
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1779 if ($coord_end_chr1_cluster2 < $coord_start_chr1_cluster1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1780 ($table,$secondTable)=($secondTable,$table); #this means that one needs to exchange cluster1 with cluster2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1781
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1782 ($coord_start_chr1_cluster1,$coord_start_chr1_cluster2) = ($coord_start_chr1_cluster2,$coord_start_chr1_cluster1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1783 ($coord_end_chr1_cluster1,$coord_end_chr1_cluster2)=($coord_end_chr1_cluster2,$coord_end_chr1_cluster1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1784 ($coord_start_chr2_cluster1,$coord_start_chr2_cluster2)=($coord_start_chr2_cluster2,$coord_start_chr2_cluster1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1785 ($coord_end_chr2_cluster1 , $coord_end_chr2_cluster2)=($coord_end_chr2_cluster2 , $coord_end_chr2_cluster1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1786
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1787 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.'),'.$clusterCoordinates_chr1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1788 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.'),'.$clusterCoordinates_chr2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1789 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1790 $clusterCoordinates_chr1 .= ',('.$coord_start_chr1_cluster2.','.$coord_end_chr1_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1791 $clusterCoordinates_chr2 .= ',('.$coord_start_chr2_cluster2.','.$coord_end_chr2_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1792 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1793 $coord_start_chr1 = min($coord_start_chr1_cluster1,$coord_start_chr1_cluster2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1794 $coord_end_chr1 = max($coord_end_chr1_cluster1,$coord_end_chr1_cluster2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1795 $coord_start_chr2 = min($coord_start_chr2_cluster1,$coord_start_chr2_cluster2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1796 $coord_end_chr2 = max($coord_end_chr2_cluster1,$coord_end_chr2_cluster2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1797 #to calculate breakpoints one need to take into account read orientation in claster..
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1798 my $leftLetterOk = substr($mate_sense, 0, 1); #R
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1799 my $rightLetterOk = substr($mate_sense, 1, 1); #F
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1800
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1801
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1802 my @index1 = keys %{$table};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1803 my @index2 = keys %{$secondTable};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1804
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1805 my (@generalStrand1,@generalStrand2) = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1806
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1807 if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1808 $leftLetterOk = 'R';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1809 $rightLetterOk = 'F';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1810 @generalStrand1 = translateSolidToRF(\@strand1,\@ends_order1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1811 @generalStrand2 = translateSolidToRF(\@strand2,\@ends_order2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1812 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1813 @generalStrand1 = @strand1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1814 @generalStrand2 = @strand2; # TODO check if it is correct
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1815 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1816 if ($generalStrand1[$index1[0]] eq $leftLetterOk && $generalStrand1[$index2[0]] eq $rightLetterOk) { #(R,F)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1817 $break_pont_chr1 = '('.$coord_end_chr1_cluster1.','.$coord_start_chr1_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1818
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1819 if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1820 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1821 $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1822 $signatureType = "TRANSLOC";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1823 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1824 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1825 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1826 $signatureType = "INS_FRAGMT";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1827 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1828
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1829 } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1830 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1831 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1832 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1833 $signatureType = "INV_INS_FRAGMT";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1834 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1835 $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1836 $signatureType = "INV_TRANSLOC";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1837 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1838 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1839 #should not occur
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1840 print STDERR "\nError in orderFiltering\n\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1841 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1842 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1843
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1844 elsif ($generalStrand1[$index1[0]] eq $rightLetterOk && $generalStrand1[$index2[0]] eq $leftLetterOk) { #(F,R)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1845 $break_pont_chr1 = '('.max(($coord_end_chr1_cluster1-$maxFragmentLength),1).','.$coord_start_chr1_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1846 $break_pont_chr1 .= ',('.$coord_end_chr1_cluster2.','.min(($coord_start_chr1_cluster2+$maxFragmentLength),$maxCoord1).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1847 if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1848 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1849 $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1850 $signatureType = "INV_INS_FRAGMT";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1851 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1852 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1853 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1854 $signatureType = "INV_COAMPLICON";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1855 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1856
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1857 } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1858 if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1859 $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1860 $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1861 $signatureType = "COAMPLICON";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1862 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1863 $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1864 $signatureType = "INS_FRAGMT";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1865 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1866 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1867 #should not occur
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1868 $signatureType = "UNDEFINED";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1869 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1870 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1871 else { # (F,F) or (R,R) something strange. We will discard the smallest cluster
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1872 $ifBalanced = 'UNBAL';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1873 if (scalar keys %{$secondTable} > scalar keys %{$table}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1874 ($table,$secondTable)=($secondTable,$table); #this means that one needs to exchange cluster1 with cluster2
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1875
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1876 $coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1877 $coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1878 $coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1879 $coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1880 $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1881 $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1882 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1883 $reads_left = scalar keys %{$table};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1884 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1885 if ($ifBalanced eq 'BAL') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1886 $ifRenv = $signatureType;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1887 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1888 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1889 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1890 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1891 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1892 if ($ifBalanced ne 'BAL') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1893 #define possible break point
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1894 $coord_start_chr1 = $coord_start_chr1_cluster1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1895 $coord_end_chr1 = $coord_end_chr1_cluster1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1896 $coord_start_chr2 = $coord_start_chr2_cluster1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1897 $coord_end_chr2 = $coord_end_chr2_cluster1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1898
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1899 my $region_length_chr1 = $coord_end_chr1-$coord_start_chr1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1900 my $region_length_chr2 = $coord_end_chr2-$coord_start_chr2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1901
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1902 my $leftLetterOk = substr($mate_sense, 0, 1); #R
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1903 my $rightLetterOk = substr($mate_sense, 1, 1); #F
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1904
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1905 my @index = keys %{$table};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1906 unless ($diff_sense_ends) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1907 my $firstEndOrder1 = $ends_order1[$index[0]];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1908 my $firstEndOrder2 = $ends_order2[$index[0]];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1909 $break_pont_chr1 = (($strand1[$index[0]] eq 'R' && $firstEndOrder1 == 2) || ($strand1[$index[0]] eq 'F' && $firstEndOrder1 == 1))?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1910 $break_pont_chr2 = (($strand2[$index[0]] eq 'R' && $firstEndOrder2 == 2) || ($strand2[$index[0]] eq 'F' && $firstEndOrder2 == 1))?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1911 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1912 $break_pont_chr1 = ($strand1[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1913 $break_pont_chr2 = ($strand2[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1914 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1915
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1916 if ($chr1 ne $chr2){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1917 $ifRenv="INV_TRANSLOC" if($ifRenv eq "REVERSE_SENSE");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1918 $ifRenv="TRANSLOC" if($ifRenv eq "NORMAL_SENSE");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1919 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1920 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1921
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1922 if (($ifBalanced eq 'BAL')&&( (scalar keys %{$table}) + (scalar keys %{$secondTable}) < $nb_pairs_threshold)) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1923 next; #discard the link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1924 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1925 if (($ifBalanced eq 'UNBAL')&&(scalar keys %{$table} < $nb_pairs_threshold)) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1926 next; #discard the link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1927 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1928 my $ratioTxt = "$reads_left/".(scalar @pairs);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1929 my ($n1,$nTot) = split ("/",$strand_ratio_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1930 my $ratioReal = $reads_left/$nTot;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1931
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1932 if ($coord_start_chr1<=0) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1933 $coord_start_chr1=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1934 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1935 if ($coord_start_chr2<=0) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1936 $coord_start_chr2=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1937 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1938 #create output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1939 my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1, #all information output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1940 $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1941 $reads_left,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1942 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@pairs),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1943 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand1),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1944 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand2),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1945 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order1),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1946 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order2),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1947 &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order1),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1948 &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order2),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1949 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions1),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1950 &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions2),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1951 $ifRenv,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1952 $strand_ratio_filtering,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1953 $ifBalanced, $ratioTxt, $break_pont_chr1, $break_pont_chr2,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1954 $ratioReal, $nTot);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1955
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1956 $nb_links++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1957 print $fh2 join("\t",@link)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1958
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1959 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1960 print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1961 $warn+=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1962 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1963
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1964 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1965 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1966 $fh2->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1967
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1968 print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1969
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1970 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1971 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1972 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1973 #gets information about ends positions given start, direction and order
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1974 sub getEnds {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1975 my ($ends,$starts,$strand,$end_order,$tag_length) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1976 for my $i (0..scalar(@{$starts})-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1977 $ends->[$i] = getEnd($starts->[$i],$strand->[$i],$end_order->[$i],$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1978 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1979 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1980 sub getEnd {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1981 my ($start,$strand, $end_order,$tag_length) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1982 return ($strand eq 'F')? $start+$tag_length->{$end_order}-1:$start-$tag_length->{$end_order}+1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1983 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1984 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1985 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1986 #gets starts and ends Coords when start=leftmost given positions, directions and orders
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1987 sub getCoordswithLeftMost {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1988
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1989 my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1990
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1991 for my $i (0..scalar(@{$positions})-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1992
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1993 if($strand->[$i] eq 'F'){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1994 $starts->[$i]=$positions->[$i];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1995 $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1996 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1997 $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1998 $ends->[$i]=$positions->[$i];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
1999 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2000 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2001 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2002 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2003 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2004 sub addInsertionInfo { #add field with INS,DEL,NA and distance between clusters and performs filtering
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2005
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2006 my ($chr,$chrID,$nb_pairs_threshold,$order_filtering,$indel_sigma_threshold,$dup_sigma_threshold,$singleton_sigma_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2007
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2008 my @sfile=split(/\./,$links_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2009 my $fchr=$sfile[$#sfile-2];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2010
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2011
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2012 my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2013
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2014 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2015 my $nb_links=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2016 my $warn=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2017
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2018 print LOG "\# Filtering out normal pairs using insert size...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2019 print LOG "-- mu length=$mu, sigma length=$sigma, indel sigma threshold=$indel_sigma_threshold, dup sigma threshold=$dup_sigma_threshold\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2020 print LOG "-- using ".($mu-$indel_sigma_threshold*$sigma)."-".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2021 ($mu+$indel_sigma_threshold*$sigma)." as normal range of insert size for indels\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2022 print LOG "-- using ".($mu-$dup_sigma_threshold*$sigma)."-".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2023 ($mu+$dup_sigma_threshold*$sigma)." as normal range of insert size for duplications\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2024 print LOG "-- using ".($mu-$singleton_sigma_threshold*$sigma)." as the upper limit of insert size for singletons\n" if($mate_sense eq "RF");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2025
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2026 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2027 my $fh2 = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2028
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2029 $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2030 $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2031
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2032 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2033
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2034 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2035 my @t = split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2036 my ($chr1,$chr2,$mates_list)=@t[0,3,7];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2037
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2038 if($chrID->{$chr1} ne $chrID->{$chr2}) { #if inter-chromosomal link here (because sv_type=all),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2039 $nb_links++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2040
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2041 $t[16]="INV_TRANSLOC" if($t[16] eq "REVERSE_SENSE");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2042 $t[16]="TRANSLOC" if($t[16] eq "NORMAL_SENSE");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2043
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2044 $t[16].= "\t";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2045 $t[19].= "\t";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2046
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2047 print $fh2 join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2048
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2049 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2050 print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2051 $warn+=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2052 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2053 next;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2054 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2055
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2056 my $ifRenv = $t[16];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2057 my $ifBalanced = "UNBAL";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2058 $ifBalanced = $t[18] if ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2059
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2060 my $numberOfPairs = $t[6];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2061 my @positions1 = deleteBadOrderSensePairs(split (/,/,$t[14]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2062 my @positions2 = deleteBadOrderSensePairs(split (/,/,$t[15]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2063
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2064 if ($ifBalanced eq "BAL") {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2065
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2066 if ($ifRenv eq "INV_TRANSLOC") {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2067 $ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2068 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2069 if ($ifRenv eq "NORMAL_SENSE") {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2070 $ifRenv = "TRANSLOC";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2071 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2072 if ($ifRenv eq "REVERSE_SENSE") {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2073 $ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2074 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2075 $t[19].= "\t";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2076
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2077 my $meanDistance = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2078
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2079 for my $i (0..$numberOfPairs-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2080 $meanDistance += $positions2[$i]-$positions1[$i];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2081 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2082 $meanDistance /= $numberOfPairs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2083
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2084 $t[16] = $ifRenv."\t".$meanDistance;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2085 #dont touch the annotation. It should be already OK.
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2086
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2087 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2088 #only for unbalanced
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2089
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2090 my $ifoverlap=overlap($t[1],$t[2],$t[4],$t[5]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2091
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2092 my $ends_sense_class = (deleteBadOrderSensePairs(split (/,/,$t[8])))[0].
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2093 (deleteBadOrderSensePairs(split (/,/,$t[9])))[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2094 my $ends_order_class = (deleteBadOrderSensePairs(split (/,/,$t[10])))[0].
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2095 (deleteBadOrderSensePairs(split (/,/,$t[11])))[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2096
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2097 my $indel_type = $ifRenv;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2098
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2099 my $meanDistance = "N/A";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2100
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2101 ($meanDistance, $indel_type) = checkIndel ($numberOfPairs, #identify insertion type for rearrangments without inversion, calculates distance between cluster
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2102 \@positions1, #assign N/A to $indel_type if unknown
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2103 \@positions2,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2104 $ifRenv,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2105 $ifoverlap,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2106 $indel_sigma_threshold,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2107 $dup_sigma_threshold,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2108 $singleton_sigma_threshold,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2109 $mu,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2110 $sigma,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2111 $ifBalanced,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2112 $ends_sense_class,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2113 $ends_order_class,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2114 $mate_sense,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2115 $diff_sense_ends,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2116 );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2117
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2118 #filtering of pairs with distance inconsistant with the SV
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2119 if ($ifRenv ne "REVERSE_SENSE") {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2120 my $maxCoord1 =$chr->{$chrID->{$chr1}}->{length};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2121 my $maxCoord2 =$chr->{$chrID->{$chr2}}->{length};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2122 $meanDistance = recalc_t_usingInsertSizeInfo(\@t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2123 $maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2124 next if ($t[6] < $nb_pairs_threshold);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2125 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2126 $t[19].= "\t";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2127 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2128 $t[16] = $indel_type."\t".$meanDistance;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2129 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2130
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2131 $nb_links++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2132
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2133 print $fh2 join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2134 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2135 print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2136 $warn+=10000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2137 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2138 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2139 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2140 $fh2->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2141
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2142 print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2143
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2144 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2145 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2146 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2147 sub checkIndel {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2148
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2149 my ($numberOfPairs, $positions1, $positions2, $ifRenv, $ifoverlap, $indel_sigma_threshold, $dup_sigma_threshold, $singleton_sigma_threshold,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2150 $mu, $sigma, $ifBalanced,$ends_sense_class,$ends_order_class,$mate_sense,$diff_sense_ends) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2151
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2152 my $meanDistance = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2153
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2154 for my $i (0..$numberOfPairs-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2155 $meanDistance += $positions2->[$i]-$positions1->[$i];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2156 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2157 $meanDistance /= $numberOfPairs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2158
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2159 return ($meanDistance,"INV_DUPLI") if (($ifRenv eq "REVERSE_SENSE") && ($meanDistance<$mu+$dup_sigma_threshold*$sigma) );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2160
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2161 return ($meanDistance,"INVERSION") if ($ifRenv eq "REVERSE_SENSE");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2162
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2163 if($diff_sense_ends){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2164 return ($meanDistance, "LARGE_DUPLI") if ($ends_sense_class ne $mate_sense) && ($meanDistance>$mu+$dup_sigma_threshold*$sigma) ;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2165 return ($meanDistance, "SINGLETON") if (($meanDistance<$mu-$singleton_sigma_threshold*$sigma) && $mate_sense eq "RF" && ($ends_sense_class eq inverseSense($mate_sense)));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2166 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2167 return ($meanDistance, "LARGE_DUPLI") if (($ends_sense_class eq $mate_sense) && ($ends_order_class eq "12") || ($ends_sense_class eq inverseSense($mate_sense)) && ($ends_order_class eq "21")) &&
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2168 ($meanDistance>$mu+$dup_sigma_threshold*$sigma) ;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2169 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2170
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2171 return ($meanDistance, "SMALL_DUPLI") if (($meanDistance<$mu-$dup_sigma_threshold*$sigma) && $ifoverlap);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2172
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2173 return ($meanDistance, "DUPLICATION") if ($diff_sense_ends && ($ends_sense_class ne $mate_sense) && ($meanDistance<$mu-$dup_sigma_threshold*$sigma) ) ;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2174
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2175 return ($meanDistance, "INSERTION") if ($meanDistance<$mu -$indel_sigma_threshold*$sigma);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2176 return ($meanDistance, "DELETION") if ($meanDistance>$mu+$indel_sigma_threshold*$sigma);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2177
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2178 return ($meanDistance, "UNDEFINED");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2179 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2180 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2181 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2182 #sub reacalulate @t so that get rid of unconsistent pairs (unconsistent insert size )
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2183 sub recalc_t_usingInsertSizeInfo {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2184 my($t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,$maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2185
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2186 my @badPairs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2187
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2188 my @positions1 = getAllEntries($t->[14]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2189 my @positions2 = getAllEntries($t->[15]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2190
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2191 if ($meanDistance < $mu) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2192 for my $i (0..scalar(@positions1)-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2193 if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]>=$mu) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2194 push(@badPairs,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2195 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2196 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2197 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2198 for my $i (0..scalar(@positions1)-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2199 if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]<=$mu) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2200 push(@badPairs,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2201 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2202 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2203 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2204
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2205 if (scalar (@badPairs)>0) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2206 #print join("\t",@badPairs).": ".join("\t",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2207 #remove these inconsistant links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2208 $t->[6] -= scalar(@badPairs); #numberOfPairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2209 return if ($t->[6] < $nb_pairs_threshold);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2210
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2211 $t->[7] = mark_values(\@badPairs, $t->[7]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2212 $t->[8] = mark_values(\@badPairs, $t->[8]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2213 $t->[9] = mark_values(\@badPairs, $t->[9]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2214 $t->[10] = mark_values(\@badPairs, $t->[10]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2215 $t->[11] = mark_values(\@badPairs, $t->[11]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2216
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2217 $t->[12] = mark_indexes(\@badPairs, $t->[12]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2218 $t->[13] = mark_indexes(\@badPairs, $t->[13]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2219
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2220 $t->[14] = mark_values(\@badPairs, $t->[14]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2221 $t->[15] = mark_values(\@badPairs, $t->[15]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2222 $t->[19] = recalculate_ratio($t->[6],$t->[19]) if ($order_filtering); #add the second ratio
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2223 $t->[17] = recalculate_ratio($t->[6],$t->[17]) unless ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2224 ($t->[1],$t->[2]) = recalculate_boundaries($t->[14],$t->[8],$t->[10],$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2225 ($t->[4],$t->[5]) = recalculate_boundaries($t->[15],$t->[9],$t->[11],$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2226
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2227 #recalc breakpoints:
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2228 my $quant001 = 3.090232;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2229 my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2230 $t->[20] = recalc_breakpoints($mate_sense,$maxCoord1,$t->[14],substr($ends_sense_class,0,1),substr($ends_order_class,0,1),$t->[1],$t->[2],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2231 $t->[21] = recalc_breakpoints($mate_sense,$maxCoord2,$t->[15],substr($ends_sense_class,1,1),substr($ends_order_class,1,1),$t->[4],$t->[5],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2232 #recalc total ratio
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2233 $t->[22] = $t->[6] / $t->[23] if ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2234 $t->[18] = $t->[6] / $t->[19] unless ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2235
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2236 @positions1 = deleteBadOrderSensePairs(split (/,/,$t->[14]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2237 @positions2 = deleteBadOrderSensePairs(split (/,/,$t->[15]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2238
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2239 $meanDistance = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2240
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2241 for my $i (0..scalar(@positions1)-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2242 $meanDistance += $positions2[$i]-$positions1[$i];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2243 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2244 $meanDistance /= scalar(@positions1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2245
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2246 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2247 $t->[17] = recalculate_ratio((split(/\//,$t->[17]))[0],$t->[17]) unless ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2248 $t->[19] = recalculate_ratio((split(/\//,$t->[19]))[0],$t->[19]) if ($order_filtering);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2249
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2250 } #nothing has been filtered
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2251 return $meanDistance;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2252 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2253
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2254 sub recalculate_ratio {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2255 my ($left, $ratio) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2256 my @elements = split (/\//,$ratio);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2257 $elements[1]= $elements[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2258 $elements[0]=$left;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2259 return $ratio."\t".join("/",@elements);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2260 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2261
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2262 sub recalc_breakpoints {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2263 my ($mate_sense,$maxCoord,$startString,$strand,$firstEndOrder,$coord_start_chr,$coord_end_chr,$maxFragmentLength,$diff_sense_ends ) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2264 my $break_pont_chr;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2265
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2266 my $leftLetterOk = substr($mate_sense, 0, 1); #R
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2267 my $rightLetterOk = substr($mate_sense, 1, 1); #F
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2268
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2269
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2270 my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2271
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2272 unless ($diff_sense_ends) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2273 $break_pont_chr = (($strand eq 'R' && $firstEndOrder == 2) || ($strand eq 'F' && $firstEndOrder == 1))?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2274 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2275 $break_pont_chr = ($strand eq $leftLetterOk)?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2276 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2277 return $break_pont_chr;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2278 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2279 sub recalculate_boundaries {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2280 my ($startString,$senseString,$endsOrderString,$tag_length) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2281 my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2282 my @strands = deleteBadOrderSensePairs(split (/,/,$senseString));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2283 my @ends_orders = deleteBadOrderSensePairs(split (/,/,$endsOrderString));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2284 my @ends; getEnds(\@ends,\@positions,\@strands,\@ends_orders,$tag_length);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2285 my $coord_start_cluster = min(min(@positions),min(@ends));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2286 my $coord_end_cluster = max(max(@positions),max(@ends));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2287 return ($coord_start_cluster,$coord_end_cluster);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2288 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2289
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2290 sub remove_indexes {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2291 my ($bads, $string) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2292 my @elements = deleteBadOrderSensePairs(split (/,/,$string));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2293 for my $i (reverse sort %{$bads}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2294 delete $elements[$i];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2295 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2296 return "(".join(",",@elements).")";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2297 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2298 ##add @ to to elements
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2299 sub mark_values {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2300 my ($bads, $string) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2301 my @elements = getAllEntries($string);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2302 for my $i (@{$bads}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2303 $elements[$i] .= "@";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2304 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2305 return "(".join(",",@elements).")";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2306 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2307 ##add @ to to indexes
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2308 sub mark_indexes {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2309 my ($bads, $string) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2310 my @elements = getAllEntries($string);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2311 for my $i ((0..scalar(@elements)-1)) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2312 for my $j (@{$bads}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2313 $elements[$i] .= "@" if ($elements[$i] eq ($j+1));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2314 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2315 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2316
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2317 return "(".join(",",@elements).")";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2318 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2319
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2320 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2321 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2322 sub redraw {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2323
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2324 my ($type,$table,$secondTable,$badInFRSense,$ifBalanced,$arr) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2325
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2326 my $out;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2327 my @first_arr;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2328 if ($ifBalanced eq 'BAL') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2329 my @second_arr;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2330 my $lastPushed = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2331 if ($type == 1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2332 for my $i (0 .. scalar(@{$arr})-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2333 if (exists ($table->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2334 push(@first_arr,$arr->[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2335 $lastPushed = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2336 }elsif (exists ($secondTable->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2337 push(@second_arr,$arr->[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2338 $lastPushed = 2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2339 } elsif ($lastPushed == 1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2340 if (exists ($badInFRSense->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2341 push(@first_arr,$arr->[$i]."\$");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2342 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2343 push(@first_arr,$arr->[$i]."*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2344 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2345 } elsif ($lastPushed == 2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2346 if (exists ($badInFRSense->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2347 push(@second_arr,$arr->[$i]."\$");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2348 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2349 push(@second_arr,$arr->[$i]."*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2350 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2351 } else {print "Error!";exit;}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2352 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2353 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2354 for my $i (@{$arr}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2355 if (exists ($table->{$i-1})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2356 push(@first_arr,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2357 $lastPushed = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2358 }elsif (exists ($secondTable->{$i-1})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2359 push(@second_arr,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2360 $lastPushed = 2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2361 } elsif ($lastPushed == 1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2362 if (exists ($badInFRSense->{$i-1})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2363 push(@first_arr,$i."\$");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2364 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2365 push(@first_arr,$i."*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2366 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2367 } elsif ($lastPushed == 2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2368 if (exists ($badInFRSense->{$i-1})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2369 push(@second_arr,$i."\$");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2370 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2371 push(@second_arr,$i."*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2372 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2373 } else {print "Error!";exit;}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2374 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2375 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2376 $out = '('.join(",",@first_arr).'),('.join(",",@second_arr).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2377 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2378 else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2379 if ($type == 1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2380 for my $i (0 .. scalar(@{$arr})-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2381 if (exists ($table->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2382 push(@first_arr,$arr->[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2383 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2384 if (exists ($badInFRSense->{$i})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2385 push(@first_arr,$arr->[$i]."\$");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2386 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2387 push(@first_arr,$arr->[$i]."*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2388 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2389 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2390 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2391 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2392 for my $i (@{$arr}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2393 if (exists ($table->{$i-1})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2394 push(@first_arr,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2395 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2396 if (exists ($badInFRSense->{$i-1})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2397 push(@first_arr,$i."\$");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2398 }else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2399 push(@first_arr,$i."*");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2400 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2401 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2402 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2403 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2404 $out = '('.join(",",@first_arr).')';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2405 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2406 return $out;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2407 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2408 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2409 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2410 sub check {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2411
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2412 my $table = $_[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2413 my $bad = 'OK';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2414 my $max = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2415 for my $i (sort {$a<=>$b} keys %{$table}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2416 unless ($table->{$i}->{nonAdeq} == 0) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2417 if ($max<$table->{$i}->{nonAdeq}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2418 $max=$table->{$i}->{nonAdeq};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2419 $bad = $i;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2420 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2421 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2422 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2423 return $bad;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2424 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2425 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2426 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2427 sub reversed {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2428
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2429 my ($i,$j,$ifRenv,$positions) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2430 if (($ifRenv eq 'REVERSE_SENSE' && $positions->[$i]<$positions->[$j]) || ($ifRenv ne 'REVERSE_SENSE' && $positions->[$i]>$positions->[$j])){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2431 return 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2432 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2433 return 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2434 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2435 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2436 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2437 sub remove {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2438
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2439 my ($bad,$table) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2440 for my $i (sort {$a<=>$b} keys %{$table}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2441 if ($bad == $i) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2442 delete($table->{$i});;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2443 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2444 if (exists($table->{$i}->{$bad})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2445 delete($table->{$i}->{$bad});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2446 $table->{$i}->{nonAdeq}--;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2447 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2448 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2449 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2450 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2451 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2452 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2453 sub findBadInRFSenseSOLiDSolexa { #choose maximum: FFFFs or RRRRs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2454
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2455 my ($strand,$ends_order,$mate_sense,@keysLeft) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2456
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2457 my $leftLetterOk = substr($mate_sense, 0, 1); #R
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2458 my $rightLetterOk = substr($mate_sense, 1, 1); #F
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2459
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2460 my (@standardArray);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2461 if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2462 $leftLetterOk = 'R';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2463 $rightLetterOk = 'F';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2464 @standardArray = translateSolidToRF($strand,$ends_order);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2465 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2466 @standardArray = @{$strand};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2467 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2468
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2469 my $ifR = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2470 my @Rs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2471
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2472 for my $i (@keysLeft) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2473 if ($standardArray[$i] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2474 $ifR++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2475 push(@Rs,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2476 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2477 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2478
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2479
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2480 my $ifF = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2481 my @Fs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2482
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2483 for my $i (@keysLeft) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2484 if ($standardArray[$i] eq $rightLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2485 $ifF++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2486 push(@Fs,$i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2487 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2488 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2489
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2490 if($ifR>=$ifF) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2491 return @Fs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2492 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2493 return @Rs;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2494 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2495
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2496 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2497 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2498 sub findBadInFRSenseSOLiDSolexa { #should work both for SOLiD and Solexa
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2499
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2500 my ($strand1,$strand2,$ends_order1,$ends_order2,$order1,$order2) = ($_[0],$_[1],$_[2],$_[3],$_[4],$_[5]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2501 my $mate_sense = $_[6];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2502
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2503 my $leftLetterOk = substr($mate_sense, 0, 1); #R
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2504 my $rightLetterOk = substr($mate_sense, 1, 1); #F
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2505
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2506 my (@standardArray1,@standardArray2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2507
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2508 if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2509 $leftLetterOk = 'R';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2510 $rightLetterOk = 'F';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2511 @standardArray1 = translateSolidToRF($strand1,$ends_order1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2512 my @arr = getOrderedStrands($strand2,$order2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2513 my @ends2 = getOrderedStrands($ends_order2,$order2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2514 @standardArray2 = translateSolidToRF(\@arr,\@ends2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2515
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2516 } else {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2517 @standardArray1 = @{$strand1};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2518 @standardArray2 = getOrderedStrands($strand2,$order2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2519 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2520
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2521 #we will try 4 possibilities, 2 for each end of the link: RFRR-FFF->RFFFF , RFRR-FFF->RRRFFF
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2522
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2523 #for the first end:
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2524
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2525 my @array = @standardArray1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2526 my %badInFRSense1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2527 for my $i (1..scalar (@array)-1){ # FRFRFFFF -> FFFFFF and RRFRFRFFFF -> RRFFFFFF
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2528 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2529 $badInFRSense1{$i}=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2530 $array[$i] = $rightLetterOk;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2531 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2532 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2533 my $numberRRRFFF_or_FFF_1 = scalar(@array)-scalar(keys %badInFRSense1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2534 @array = @standardArray1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2535 my %badInFRSense0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2536 for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFFRR -> FFFFFFRR
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2537 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2538 $badInFRSense0{$i-1}=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2539 $array[$i-1] = $leftLetterOk;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2540
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2541 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2542 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2543 my $numberRRF1 = scalar(@array)-scalar(keys %badInFRSense0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2544
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2545 #for the second end:
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2546 @array = @standardArray2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2547
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2548 my %badInFRSense3;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2549 for my $i (1..scalar(@array)-1){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2550 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2551 $badInFRSense3{$order2->[$i]}=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2552 $array[$i] = $rightLetterOk;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2553 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2554 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2555 my $numberRRRFFF_or_FFF_2 = scalar(@array)-scalar(keys %badInFRSense3);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2556
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2557 @array = @standardArray2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2558 my %badInFRSense5;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2559 for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFF -> FFFFFF
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2560 if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2561 $badInFRSense5{$i-1}=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2562 $array[$i-1] = $leftLetterOk;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2563 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2564 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2565 my $numberRRF2 = scalar(@array)-scalar(keys %badInFRSense5);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2566
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2567 if ($numberRRF1>=$numberRRRFFF_or_FFF_1 && $numberRRF1 >= $numberRRRFFF_or_FFF_2 && $numberRRF1 >=$numberRRF2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2568 return (1,%badInFRSense0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2569 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2570
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2571 if ($numberRRRFFF_or_FFF_1 >=$numberRRF1 && $numberRRRFFF_or_FFF_1 >= $numberRRRFFF_or_FFF_2 && $numberRRRFFF_or_FFF_1 >= $numberRRF2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2572 return (1,%badInFRSense1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2573 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2574
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2575 if ($numberRRRFFF_or_FFF_2 >= $numberRRF1 && $numberRRRFFF_or_FFF_2 >= $numberRRRFFF_or_FFF_1 && $numberRRRFFF_or_FFF_2 >=$numberRRF2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2576 return (2,%badInFRSense3);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2577 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2578
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2579 if ($numberRRF2 >= $numberRRF1 && $numberRRF2 >= $numberRRRFFF_or_FFF_1 && $numberRRF2 >= $numberRRRFFF_or_FFF_2 ) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2580 return (2,%badInFRSense5);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2581 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2582
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2583 #should not get here:
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2584 print STDERR "Error in findBadInFRSenseSOLiDSolexa()!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2585 return (1,%badInFRSense1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2586 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2587
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2588 sub getOrderedStrands {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2589 my ($strand,$order) = ($_[0],$_[1]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2590 my @arr;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2591 for my $i (0..scalar(@{$strand})-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2592 push(@arr,$strand->[$order->[$i]-1]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2593 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2594 return @arr;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2595 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2596 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2597 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2598 sub checkClusters {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2599
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2600 my ($ifRenv,$coord_start_chr1_cluster1,$coord_start_chr1_cluster2,$coord_start_chr2_cluster1,$coord_start_chr2_cluster2) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2601 if ($ifRenv eq 'REVERSE_SENSE') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2602 if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2603 return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2604 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2605 return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2606 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2607 #if NORM
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2608 if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2609 return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2610 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2611 return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2612 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2613
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2614 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2615 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2616 sub translateSolidToRF {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2617 my ($strandArr,$ends_orderArr)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2618 my @array;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2619 for my $i (0..scalar(@{$strandArr})-1) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2620 if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'F') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2621 push(@array,'F');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2622 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2623 if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'F') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2624 push(@array,'R');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2625 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2626 if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'R') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2627 push(@array,'R');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2628 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2629 if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'R') {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2630 push(@array,'F');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2631 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2632 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2633 return @array;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2634 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2635
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2636 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2637 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2638 #convert the links file to the circos format
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2639 sub links2segdup{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2640
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2641 my($id,$color_code,$links_file,$segdup_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2642
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2643 print LOG "# Converting to the circos format...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2644
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2645 tie (my %hcolor,'Tie::IxHash'); #color-code hash table
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2646 foreach my $col (keys %{$color_code}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2647 my ($min_links,$max_links)=split(",",$color_code->{$col});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2648 $hcolor{$col}=[$min_links,$max_links];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2649 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2650
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2651 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2652 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2653
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2654 my $index=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2655 while(<LINKS>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2656
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2657 my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2658
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2659 my $color=getColor($count,\%hcolor,"circos"); #get the color-code according the number of links
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2660
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2661 print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n". #circos output
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2662 "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2663 $index++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2664 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2665
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2666 close LINKS;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2667 close SEGDUP;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2668 print LOG "-- output created: $segdup_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2669 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2670 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2671 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2672 #convert the links file to the bedPE format for BEDTools usage
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2673 sub links2bedPElinksfile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2674
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2675 my ($sample,$links_file,$bedpe_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2676
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2677 open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2678 open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2679
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2680 my $nb_links=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2681
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2682 while(<LINKS>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2683
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2684 chomp;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2685 my @t=split("\t",$_);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2686 my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2687 my $type=($chr1 eq $chr2)? "INTRA":"INTER";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2688 $type.="_".$t[10];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2689
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2690 $start1--; $start2--;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2691
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2692 print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2693 "\t$sample"."_link$nb_links\t$type\t.\t.".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2694 "\t".join("|",@t)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2695
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2696 $nb_links++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2697 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2698
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2699 close LINKS;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2700 close BEDPE;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2701
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2702 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2703 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2704 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2705 sub bedPElinks2linksfile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2706
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2707 my ($bedpe_file,$links_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2708
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2709 open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2710 open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2711
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2712 while(<BEDPE>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2713
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2714 chomp;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2715 my $sample=(split("_",(split("\t",$_))[6]))[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2716 my @t1=(split("\t",$_))[0,1,2,3,4,5];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2717 my @t2=split(/\|/,(split("\t",$_))[10]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2718 push(@t2,$sample);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2719
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2720 print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2721
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2722 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2723 close BEDPE;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2724 close LINKS;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2725
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2726 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2727 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2728 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2729 #convert the links file to the bed format
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2730 sub links2bedfile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2731
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2732 my ($tag_length,$color_code,$links_file,$bed_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2733
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2734 print LOG "# Converting to the bed format...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2735
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2736 my $compare=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2737 if($links_file!~/compared$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2738 $compare=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2739 $tag_length->{none}->{1}=$tag_length->{1};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2740 $tag_length->{none}->{2}=$tag_length->{2};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2741 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2742
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2743 #color-code hash table
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2744 tie (my %hcolor,'Tie::IxHash');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2745 my %color_order;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2746 my $n=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2747 foreach my $col (keys %{$color_code}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2748 my ($min_links,$max_links)=split(",",$color_code->{$col});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2749 $hcolor{$col}=[$min_links,$max_links];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2750 $color_order{$col}=$n;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2751 $n++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2752 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2753
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2754 my %pair;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2755 my %pt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2756 $n=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2757 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2758
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2759 my %str=( "F"=>"+", "R"=>"-" );
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2760
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2761 while(<LINKS>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2762
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2763 my @t=split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2764 my $sample=($compare)? pop(@t):"none";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2765
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2766 my $chr1=$t[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2767 my $chr2=$t[3];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2768 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2769 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2770 my $same_chr=($chr1 eq $chr2)? 1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2771
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2772 my $count=$t[6];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2773 my $color=getColor($count,\%hcolor,"bed");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2774
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2775 my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2776 my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2777 my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2778 my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2779 my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2780 my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2781 my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2782 my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2783 my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2784
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2785
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2786 for my $p (0..$#pairs){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2787
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2788 if (!exists $pair{$pairs[$p]}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2789
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2790 if($same_chr){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2791
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2792 $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2793 $start1[$p]-1, $end2[$p], $color,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2794 2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2795 $pt{$n}=$pair{$pairs[$p]}->{0};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2796 $n++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2797
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2798 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2799
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2800 $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2801 $start1[$p]-1, $end1[$p], $color,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2802 1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2803 $pt{$n}=$pair{$pairs[$p]}->{1};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2804 $n++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2805
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2806
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2807 $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2808 $start2[$p]-1, $end2[$p], $color,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2809 1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2810 $pt{$n}=$pair{$pairs[$p]}->{2};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2811 $n++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2812 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2813 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2814
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2815 if($same_chr){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2816 ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2817 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2818 ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2819 ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2820 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2821 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2822 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2823 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2824 close LINKS;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2825
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2826 my $nb_pairs=$n-1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2827
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2828 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2829 print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2830 "visibility=2 itemRgb=\"On\"\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2831
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2832 for my $i (1..$nb_pairs){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2833 print BED join("\t",@{$pt{$i}})."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2834 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2835
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2836 close BED;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2837
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2838 print LOG "-- output created: $bed_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2839
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2840 undef %pair;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2841 undef %pt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2842
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2843 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2844 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2845 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2846 sub deleteBadOrderSensePairs{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2847
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2848 my (@tab)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2849 my @tab2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2850
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2851 foreach my $v (@tab){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2852
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2853 $v=~s/[\(\)]//g;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2854 push(@tab2,$v) if($v!~/[\$\*\@]$/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2855 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2856 return @tab2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2857 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2858 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2859 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2860 sub getAllEntries{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2861 my (@tab)=split (/,/,$_[0]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2862 my @tab2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2863
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2864 foreach my $v (@tab){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2865
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2866 $v=~s/[\(\)]//g;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2867 push(@tab2,$v);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2868 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2869 return @tab2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2870 }#------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2871 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2872 sub getAllEntriesWOspecialChar{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2873 my (@tab)=split (/,/,$_[0]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2874 my @tab2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2875
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2876 foreach my $v (@tab){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2877
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2878 $v=~s/[\(\)\$\*\@]//g;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2879 push(@tab2,$v);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2880 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2881 return @tab2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2882 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2883 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2884 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2885 sub links2SVfile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2886
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2887 my($links_file,$sv_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2888
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2889 print LOG "# Converting to the sv output table...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2890 open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2891 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2892
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2893 my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2894 chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2895 final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2896
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2897 my $nb_links=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2898
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2899 while (<LINKS>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2900
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2901 my @t=split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2902 my @sv=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2903 my $sv_type="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2904 my $strand_ratio="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2905 my $eq_ratio="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2906 my $eq_type="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2907 my $insert_ratio="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2908 my $link="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2909 my ($bk1, $bk2)=("-","-");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2910 my $score="-";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2911
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2912 my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2913 my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2914 my $nb_pairs=$t[6];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2915 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2916 $chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2917 my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2918
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2919 #if strand filtering
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2920 if (defined $t[16]){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2921 #if inter-chr link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2922 $sv_type=$t[16];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2923 if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2924 $strand_ratio=floor($1/$2*100)."%";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2925 $score=$t[18];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2926 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2927 if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2928 #if intra-chr link with insert size filtering
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2929 $strand_ratio=floor($1/$2*100)."%";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2930 $link=floor($t[17]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2931 if($sv_type!~/^INV/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2932 $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2933 $score=$t[20];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2934 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2935 $score=$t[19];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2936 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2937 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2938 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2939
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2940 if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2941
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2942 #if strand and order filtering only and/or interchr link
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2943 $eq_type=$t[18];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2944 $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2945 ($bk1, $bk2)=($t[20],$t[21]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2946 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2947 $score=$t[22];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2948
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2949 }elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2950
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2951 #if all three filtering
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2952 $link=floor($t[17]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2953 $eq_type=$t[19];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2954 $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2955
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2956 if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2957 $insert_ratio=floor($1/$2*100)."%";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2958 ($bk1, $bk2)=($t[22],$t[23]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2959 $score=$t[24];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2960
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2961 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2962 ($bk1, $bk2)=($t[21],$t[22]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2963 $score=$t[23];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2964 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2965 foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2966
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2967 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2968
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2969
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2970 push(@sv, $chr_type, $sv_type,$eq_type);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2971 push(@sv,"$chr1\t$start1-$end1");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2972 push(@sv, $link);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2973 push(@sv,"$chr2\t$start2-$end2",
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2974 $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2975
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2976
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2977 print SV join("\t",@sv)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2978 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2979
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2980 close LINKS;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2981 close SV;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2982
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2983 system "sort -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2984
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2985 open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2986 my @links=<SV>;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2987 close SV;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2988
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2989 open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2990
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2991 print SV join("\t",@header)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2992 print SV @links;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2993 close SV;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2994
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2995 unlink($sv_file.".sorted");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2996
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2997 print LOG "-- output created: $sv_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2998
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
2999 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3000 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3001 sub densityCalculation{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3002
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3003 my ($chr,$chrID,$file,$tag_length,$window_dist,$step,$mates_file,$mates_file_ref,$density_file,$input_format)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3004
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3005 my @sfile=split(/\./,$$mates_file[$file]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3006 my $fchr=$sfile[$#sfile];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3007
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3008 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3009
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3010 my %density;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3011 my %density_ref;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3012 my @ratio;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3013 my ($cov,$cov_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3014
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3015 #FREQUENCY CALCULATION PROCEDURE
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3016 print LOG "# $fchr : Frequency calculation procedure...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3017 &FreqCalculation(\%density,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file[$file],$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3018 &FreqCalculation(\%density_ref,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file_ref[$file],$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3019
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3020 #MAKING RATIO AND OUTPUT
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3021 print LOG "\# Ratio calculation procedure...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3022 $density_file=~s/\/mates\//\/density\//;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3023 $fh->open(">".$density_file) or die "$0: can't write in the output ".$density_file." :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3024
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3025 foreach my $k (1..$chr->{nb_chrs}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3026 foreach my $frag (1..$chr->{$k}->{nb_frag}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3027
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3028 @ratio= ($chr->{$k}->{name},
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3029 (${$chr->{$k}->{$frag}}[0]+1),
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3030 (${$chr->{$k}->{$frag}}[1]+1));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3031
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3032 $cov=(exists $density{$k}{$frag}->{count})? $density{$k}{$frag}->{count}:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3033 $cov_ref=(exists $density_ref{$k}{$frag}->{count})? $density_ref{$k}{$frag}->{count}:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3034
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3035 push(@ratio,$cov,$cov_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3036 push(@ratio,log($cov/$cov_ref)) if($cov && $cov_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3037 push(@ratio,-log($cov_ref+1)) if(!$cov && $cov_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3038 push(@ratio,log($cov+1)) if($cov && !$cov_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3039 next if(!$cov && !$cov_ref);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3040
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3041 print $fh join("\t",@ratio)."\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3042 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3043 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3044
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3045 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3046 print LOG "-- output created: $density_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3047
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3048 undef %density;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3049 undef %density_ref;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3050 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3051 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3052 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3053 sub FreqCalculation{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3054
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3055 my ($density,$chr,$chrID,$tag_length,$window_dist,$step,$mates_file,$input_format) = @_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3056
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3057 my @sfile=split(/\./,$mates_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3058 my $fchr=$sfile[$#sfile];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3059 my $fh = new FileHandle;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3060
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3061 my $nb_windows=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3062 my $warn=100000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3063 my $record=0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3064 my %pair;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3065
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3066 my ($sumX,$sumX2) = (0,0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3067
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3068 print LOG "\# Frequency calculation for $mates_file...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3069
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3070 if ($mates_file =~ /.gz$/) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3071 $fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3072 }elsif($mates_file =~ /.bam$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3073 o$fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3074 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3075 $fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3076 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3077
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3078 while(<$fh>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3079
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3080 my @t=split;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3081 my $mate=$t[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3082
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3083 my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2,);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3084
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3085 next if(exists $pair{$mate});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3086
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3087 next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2,\$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3088
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3089 next unless (exists $chrID->{$chr_read1} || exists $chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3090 ($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3091
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3092 $pair{$mate}=undef;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3093 $record++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3094
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3095 my ($coord_start_read1,$coord_end_read1, $coord_start_read2,$coord_end_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3096
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3097 recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3098 recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3099
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3100 my $length = abs($coord_start_read1-$coord_start_read2);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3101 $sumX += $length; #add to sum and sum^2 for mean and variance calculation
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3102 $sumX2 += $length*$length;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3103
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3104 for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3105
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3106 if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3107
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3108 &addToDensity($density,$chr_read1,$i,\$nb_windows)
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3109 if(overlap($coord_start_read1,$coord_end_read2,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1]));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3110
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3111 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3112
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3113 $i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3114 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3115 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3116
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3117 if($record>=$warn){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3118 print LOG "-- $warn mate-pairs analysed - $nb_windows points created\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3119 $warn+=100000;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3120 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3121 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3122 $fh->close;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3123
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3124 print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_windows points created\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3125
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3126 if($record>0){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3127
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3128 my $mu = $sumX/$record;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3129 my $sigma = sqrt($sumX2/$record - $mu*$mu);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3130 print LOG "-- $fchr : mu length = $mu, sigma length = $sigma\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3131 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3132
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3133 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3134 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3135 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3136 sub ratio2segdup{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3137
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3138 my($id,$density_file,$segdup_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3139
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3140 print LOG "# Converting to circos format...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3141
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3142 open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3143 open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3144
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3145 while(<RATIO>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3146 chomp;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3147 my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3148 print SEGDUP "$id$chr1\t$start1\t$end1\t$ratio\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3149 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3150
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3151 close RATIO;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3152 close SEGDUP;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3153 print LOG "-- output created: $segdup_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3154 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3155 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3156 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3157 sub ratio2bedfile{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3158
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3159 my($density_file,$bed_file)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3160
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3161 print LOG "# Converting to bedGraph format...\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3162
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3163 open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3164 open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3165 print BED "track type=bedGraph name=\"$bed_file\" description=\"log ratios for cnv detection\" ".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3166 "visibility=2 color=255,0,0 alwaysZero=\"On\"\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3167
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3168 while(<RATIO>){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3169 chomp;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3170 my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3171 $chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3172 print BED "$chr1\t".($start1-1)."\t$end1\t$ratio\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3173 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3174
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3175 close RATIO;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3176 close BED;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3177 print LOG "-- output created: $bed_file\n";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3178 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3179 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3180 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3181 sub inverseSense{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3182
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3183 my $mate_sense=$_[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3184 my %reverse=( 'F' => 'R' , 'R' => 'F' ,
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3185 'FF' => 'RR', 'RR' => 'FF',
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3186 'FR' => 'RF', 'RF' => 'FR');
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3187 return $reverse{$mate_sense};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3188 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3189
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3190 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3191 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3192 sub getNextFrag{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3193
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3194 my ($read_start,$frag_num,$frag_start,$frag_last,$window_dist,$step)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3195
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3196 my $how_far = $read_start-$frag_start;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3197 my $nb_windows_toskip;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3198
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3199 if($how_far>0){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3200
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3201 $nb_windows_toskip=($how_far/$step)-($window_dist/$step);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3202 $nb_windows_toskip=~ s/\..*//;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3203 $nb_windows_toskip=0 if($nb_windows_toskip<0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3204 return ($frag_num + $nb_windows_toskip);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3205 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3206 return $frag_last;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3207 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3208 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3209 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3210 sub getColor{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3211
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3212 my($count,$hcolor,$format)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3213 for my $col ( keys % { $hcolor} ) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3214 return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3215 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3216 return "white" if($format eq "circos");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3217 return "255,255,255" if($format eq "bed");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3218 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3219 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3220 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3221 sub recupCoords{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3222
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3223 my($c_hit,$cs_hit,$ce_hit,$tag_length,$input_format)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3224 my $strand = 'F';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3225
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3226 if ($c_hit=~s/^\-//) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3227 $strand='R';
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3228 $$cs_hit=$c_hit;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3229 $$ce_hit=$c_hit-($tag_length-1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3230 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3231 $$cs_hit=$c_hit;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3232 $$ce_hit=$c_hit+($tag_length-1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3233 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3234 return $strand;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3235
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3236 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3237 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3238 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3239 sub overlap {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3240 my($cs_hit,$ce_hit,$cs_region,$ce_region)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3241 if( (($cs_hit < $cs_region) && ($ce_hit < $cs_region )) || (($cs_hit > $ce_region) && ($ce_hit > $ce_region )) ) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3242 return 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3243 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3244 return 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3245 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3246 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3247 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3248 sub makeLink {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3249
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3250 my ($link,$chr1,$frag1,$chr2,$frag2,$mt,$nb)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3251
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3252 if($chr1>$chr2){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3253 ($chr1,$chr2)= ($chr2,$chr1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3254 ($frag1,$frag2)= ($frag2,$frag1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3255 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3256
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3257 if($chr1 == $chr2){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3258 if($frag1>$frag2){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3259 ($frag1,$frag2)= ($frag2,$frag1);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3260 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3261 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3262
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3263 if(!exists $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3264 $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}=$mt;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3265 $$nb++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3266 }elsif($link->{$chr1}->{$chr2}->{$frag1}->{$frag2}!~/(^|,)$mt(,|$)/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3267 $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}.=",$mt";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3268 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3269 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3270 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3271 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3272 #fonction of adding the read to the density profile
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3273 sub addToDensity {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3274
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3275 my ($density,$chr1,$frag1,$nb)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3276
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3277 if(!exists $density->{$chr1}->{$frag1}->{count}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3278 $density->{$chr1}->{$frag1}->{count}=1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3279 $$nb++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3280 }else{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3281 $density->{$chr1}->{$frag1}->{count}++;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3282 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3283 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3284 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3285 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3286 sub floor {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3287 my $nb = $_[0];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3288 $nb=~ s/\..*//;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3289 return $nb;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3290 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3291 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3292 sub decimal{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3293
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3294 my $num=shift;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3295 my $digs_to_cut=shift;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3296
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3297 $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3298
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3299 return $num;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3300 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3301
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3302 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3303 sub max {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3304
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3305 my($max) = shift(@_);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3306 foreach my $temp (@_) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3307 $max = $temp if $temp > $max;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3308 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3309 return($max);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3310 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3311 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3312 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3313 sub min {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3314
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3315 my($min) = shift(@_);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3316 foreach my $temp (@_) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3317 $min = $temp if $temp < $min;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3318 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3319 return($min);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3320 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3321 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3322 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3323 sub sortTablebyIndex{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3324 my ($tab1,$tab2)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3325 my @tab3;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3326
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3327 foreach my $i (@$tab1){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3328 $tab3[$i]=$$tab2[$$tab1[$i]];
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3329 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3330 return @tab3;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3331 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3332 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3333 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3334 sub round {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3335 my $number = shift || 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3336 my $dec = 10 ** (shift || 0);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3337 return int( $dec * $number + .5 * ($number <=> 0)) / $dec;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3338 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3339 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3340 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3341 sub getUniqueTable{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3342
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3343 my (@tab)=@_;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3344 my (%saw,@out)=();
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3345 undef %saw;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3346 return sort(grep(!$saw{$_}++, @tab));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3347 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3348 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3349 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3350 sub catFiles {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3351
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3352 unlink("$_[1]") if(exists $_[1]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3353 system qq( cat "$_" >> "$_[1]" ) for @{$_[0]};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3354 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3355 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3356 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3357 #check if the configuration file is correct
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3358 sub validateconfiguration{
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3359
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3360 my %conf=%{$_[0]};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3361 my $list_prgs="@ARGV";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3362
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3363 my @general_params=qw(input_format mates_orientation read1_length read2_length mates_file cmap_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3364 my @detection_params=qw(split_mate_file window_size step_length split_mate_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3365 my @filtering_params=qw(split_link_file nb_pairs_threshold strand_filtering split_link_file);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3366 my @circos_params=qw(organism_id colorcode);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3367 my @bed_params=qw(colorcode);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3368 my @compare_params=qw(list_samples file_suffix);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3369
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3370 foreach my $dir ($conf{general}{output_dir},$conf{general}{tmp_dir}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3371
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3372 unless (defined($dir)) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3373 $dir = ".";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3374 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3375 unless (-d $dir){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3376 mkdir $dir or die;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3377 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3378 $dir.="/" if($dir!~/\/$/);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3379 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3380
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3381 unless (defined($conf{general}{num_threads})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3382 $conf{general}{num_threads} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3383 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3384 $conf{general}{num_threads}=24 if($conf{general}{num_threads}>24);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3385
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3386 if($list_prgs!~/links2compare/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3387
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3388 foreach my $p (@general_params){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3389 die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{general}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3390 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3391
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3392 $conf{general}{input_format}="sam" if($conf{general}{input_format} eq "bam");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3393
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3394 unless (defined($conf{general}{sv_type})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3395 $conf{general}{sv_type} = "all";
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3396 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3397
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3398 $conf{general}{read_lengths}={ 1=> $conf{general}{read1_length}, 2=> $conf{general}{read2_length}};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3399 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3400
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3401 if($list_prgs=~/(linking|cnv)/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3402
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3403 foreach my $p (@detection_params){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3404 die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{detection}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3405 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3406
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3407 die("Error Config : The parameter \"mates_file_ref\" is not defined\n") if($list_prgs=~/cnv/ && !defined $conf{detection}{mates_file_ref});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3408
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3409 if($conf{detection}{step_length}>$conf{detection}{window_size}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3410 die("Error Config : Parameter \"step_length\" should not exceed \"window size\"\n");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3411 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3412
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3413 unless (-d $conf{general}{tmp_dir}."/mates"){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3414 mkdir $conf{general}{tmp_dir}."/mates" or die;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3415 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3416
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3417 if($list_prgs=~/linking/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3418 unless (-d $conf{general}{tmp_dir}."/links"){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3419 mkdir $conf{general}{tmp_dir}."/links" or die;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3420 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3421 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3422 if($list_prgs=~/cnv/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3423 unless (-d $conf{general}{tmp_dir}."/density"){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3424 mkdir $conf{general}{tmp_dir}."/density" or die;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3425 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3426 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3427
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3428 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3429
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3430 if($list_prgs=~/filtering/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3431
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3432 foreach my $p (@filtering_params) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3433 die("Error Config : The filtering parameter \"$p\" is not defined\n") if (!defined $conf{filtering}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3434
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3435 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3436
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3437 if(defined($conf{filtering}{chromosomes})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3438 my @chrs=split(",",$conf{filtering}{chromosomes});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3439 my $exclude=($chrs[0]=~/^\-/)? 1:0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3440 for my $chrName (@chrs){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3441
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3442 die("Error Config : The filtering parameter \"chromosomes\" is not valid\n")
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3443 if(($chrName!~/^\-/ && $exclude) || ($chrName=~/^\-/ && !$exclude));
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3444
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3445 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3446 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3447
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3448 if (( $conf{filtering}{order_filtering} )&& !$conf{filtering}{strand_filtering}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3449 die("Error Config : The parameter strand_filtering is set to \"0\" while order_filtering is selected".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3450 "\nChange strand_filtering to \"1\" if you want to use the order filtering\n");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3451 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3452 if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{order_filtering}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3453 die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use order filtering\n");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3454 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3455 if (( $conf{filtering}{insert_size_filtering} )&& !$conf{filtering}{strand_filtering}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3456 die("Error Config : The parameter strand_filtering is set to \"0\" while insert_size_filtering is selected".
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3457 "\nChange strand_filtering to \"1\" if you want to use the insert size filtering\n");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3458 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3459 if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{insert_size_filtering}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3460 die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use discriminate insertions from deletions\n");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3461 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3462
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3463 if (!defined($conf{filtering}{indel_sigma_threshold})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3464 $conf{filtering}{indel_sigma_threshold} = 2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3465 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3466 if (!defined($conf{filtering}{dup_sigma_threshold})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3467 $conf{filtering}{dup_sigma_threshold} = 2;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3468 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3469 if (!defined($conf{filtering}{singleton_sigma_threshold})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3470 $conf{filtering}{singleton_sigma_threshold} = 4;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3471 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3472
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3473 if (!defined($conf{filtering}{nb_pairs_order_threshold})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3474 $conf{filtering}{nb_pairs_order_threshold} = 1;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3475 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3476
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3477 if (!defined($conf{filtering}{final_score_threshold})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3478 $conf{filtering}{final_score_threshold} = 0.8;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3479 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3480
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3481 if ($conf{filtering}{nb_pairs_order_threshold}>$conf{filtering}{nb_pairs_threshold}) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3482 die("Error Config : Parameter \"nb_pairs_order_threshold\" should not exceed \"nb_pairs_threshold\"\n");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3483 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3484
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3485 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3486
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3487 if($list_prgs=~/2circos$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3488 foreach my $p (@circos_params) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3489 next if($list_prgs=~/^ratio/ && $p eq "colorcode");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3490 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3491 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3492 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3493
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3494 if($list_prgs=~/2bed$/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3495 foreach my $p (@bed_params) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3496 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3497 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3498 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3499
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3500 if($list_prgs=~/links2compare/){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3501 foreach my $p (@compare_params) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3502 die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3503 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3504
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3505 unless (defined($conf{compare}{same_sv_type})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3506 $conf{compare}{same_sv_type} = 0;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3507 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3508
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3509 unless (defined($conf{compare}{min_overlap})) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3510 $conf{compare}{min_overlap} = 1E-9;
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3511 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3512
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3513 if($conf{compare}{circos_output}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3514 foreach my $p (@circos_params) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3515 next if($list_prgs=~/^ratio/ && $p eq "colorcode");
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3516 die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3517 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3518 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3519 if($conf{compare}{bed_output}){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3520 foreach my $p (@bed_params) {
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3521 die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3522 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3523 die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3524
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3525 my @samples=split(",",$conf{compare}{list_samples});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3526 my @read_lengths=split(",",$conf{compare}{list_read_lengths});
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3527 for my $i (0..$#samples){
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3528 my @l=split("-",$read_lengths[$i]);
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3529 $conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3530 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3531 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3532 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3533
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3534
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3535 }
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3536 #------------------------------------------------------------------------------#
ba8c5e544948 Uploaded
bzeitouni
parents:
diff changeset
3537 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#