| 
3
 | 
     1 #!/usr/bin/perl -w
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 =pod
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 =head1 NAME
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 SVDetect - Program designed to the detection of structural variations
 | 
| 
 | 
     8 from paired-end/mate-pair sequencing data, compatible with SOLiD and Illumina (>=1.3) reads
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 Version: 0.8b for Galaxy
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 =head1 SYNOPSIS
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 SVDetect <command> -conf <configuration_file> [-help] [-man]
 | 
| 
 | 
    15     
 | 
| 
 | 
    16     Command:
 | 
| 
 | 
    17 
 | 
| 
 | 
    18     	linking		detection and isolation of links
 | 
| 
 | 
    19         filtering	filtering of links according different parameters
 | 
| 
 | 
    20         links2circos	links conversion to circos format
 | 
| 
 | 
    21 	links2bed 	paired-ends of links converted to bed format (UCSC)
 | 
| 
 | 
    22 	links2SV	formatted output to show most significant SVs
 | 
| 
 | 
    23 	cnv		calculate copy-number profiles
 | 
| 
 | 
    24 	ratio2circos	ratio conversion to circos density format
 | 
| 
 | 
    25 	ratio2bedgraph	ratio conversion to bedGraph density format (UCSC)
 | 
| 
 | 
    26     
 | 
| 
 | 
    27 =head1 DESCRIPTION
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 This is a command-line interface to SVDetect.
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 =head1 AUTHORS
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 Bruno Zeitouni E<lt>bruno.zeitouni@curie.frE<gt>,
 | 
| 
 | 
    35 Valentina Boeva E<lt>valentina.boeva@curie.frE<gt>
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 =cut
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 # -------------------------------------------------------------------
 | 
| 
 | 
    40 
 | 
| 
 | 
    41 use strict;
 | 
| 
 | 
    42 use warnings;
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 use Pod::Usage;
 | 
| 
 | 
    45 use Getopt::Long;
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 use Config::General;
 | 
| 
 | 
    48 use Tie::IxHash;
 | 
| 
 | 
    49 use FileHandle;
 | 
| 
 | 
    50 use Parallel::ForkManager;
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
    53 #PARSE THE COMMAND LINE
 | 
| 
 | 
    54 my %OPT;
 | 
| 
 | 
    55 GetOptions(\%OPT,
 | 
| 
 | 
    56 	   'conf=s',
 | 
| 
 | 
    57 	   'out1=s', #GALAXY
 | 
| 
 | 
    58 	   'out2=s', #GALAXY
 | 
| 
 | 
    59 	   'out3=s', #GALAXY
 | 
| 
 | 
    60 	   'out4=s', #GALAXY
 | 
| 
 | 
    61 	   'out5=s', #GALAXY
 | 
| 
 | 
    62 	   'l=s', #GALAXY
 | 
| 
 | 
    63 	   'N=s',#GALAXY
 | 
| 
 | 
    64 	   'help',#GALAXY
 | 
| 
 | 
    65            'man'
 | 
| 
 | 
    66 	  );
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 pod2usage() if $OPT{help};
 | 
| 
 | 
    69 pod2usage(-verbose=>2) if $OPT{man};
 | 
| 
 | 
    70 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf});
 | 
| 
 | 
    71 
 | 
| 
 | 
    72 pod2usage() if(@ARGV<1);
 | 
| 
 | 
    73 
 | 
| 
 | 
    74 tie (my %func, 'Tie::IxHash',linking=>\&createlinks,
 | 
| 
 | 
    75 			     filtering=>\&filterlinks,
 | 
| 
 | 
    76 			     links2circos=>\&links2circos,
 | 
| 
 | 
    77 			     links2bed=>\&links2bed,
 | 
| 
 | 
    78 			     links2compare=>\&links2compare,
 | 
| 
 | 
    79 			     links2SV=>\&links2SV,
 | 
| 
 | 
    80 			     cnv=>\&cnv,
 | 
| 
 | 
    81 			     ratio2circos=>\&ratio2circos,
 | 
| 
 | 
    82 			     ratio2bedgraph=>\&ratio2bedgraph);
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 foreach my $command (@ARGV){
 | 
| 
 | 
    85     pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command}));
 | 
| 
 | 
    86 }
 | 
| 
 | 
    87 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
    88 
 | 
| 
 | 
    89 
 | 
| 
 | 
    90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
    91 #READ THE CONFIGURATION FILE
 | 
| 
 | 
    92 my $conf=Config::General->new(    -ConfigFile        => $OPT{conf},
 | 
| 
 | 
    93                                   -Tie => "Tie::IxHash",
 | 
| 
 | 
    94                                   -AllowMultiOptions => 1,
 | 
| 
 | 
    95 				  -LowerCaseNames    => 1,
 | 
| 
 | 
    96 				  -AutoTrue => 1);
 | 
| 
 | 
    97 my %CONF= $conf->getall;
 | 
| 
 | 
    98 validateconfiguration(\%CONF);							#validation of the configuration parameters
 | 
| 
 | 
    99 
 | 
| 
 | 
   100 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY
 | 
| 
 | 
   101 
 | 
| 
 | 
   102 my $pt_log_file=$OPT{l}; #GALAXY
 | 
| 
 | 
   103 my $pt_links_file=$OPT{out1} if($OPT{out1}); #GALAXY
 | 
| 
 | 
   104 my $pt_flinks_file=$OPT{out2} if($OPT{out2}); #GALAXY
 | 
| 
 | 
   105 my $pt_sv_file=$OPT{out3} if($OPT{out3}); #GALAXY
 | 
| 
 | 
   106 my $pt_circos_file=$OPT{out4} if($OPT{out4}); #GALAXY
 | 
| 
 | 
   107 my $pt_bed_file=$OPT{out5} if($OPT{out5}); #GALAXY
 | 
| 
 | 
   108 
 | 
| 
 | 
   109 $CONF{general}{mates_file}=readlink($CONF{general}{mates_file});#GALAXY
 | 
| 
 | 
   110 $CONF{general}{cmap_file}=readlink($CONF{general}{cmap_file});#GALAXY
 | 
| 
 | 
   111 
 | 
| 
 | 
   112 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_run.log"; #GALAXY
 | 
| 
 | 
   113 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY
 | 
| 
 | 
   114 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
   115 
 | 
| 
 | 
   116 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
   117 #COMMAND EXECUTION
 | 
| 
 | 
   118 foreach my $command (@ARGV){
 | 
| 
 | 
   119     &{$func{$command}}();
 | 
| 
 | 
   120 }
 | 
| 
 | 
   121 print LOG "-- end\n";#GALAXY
 | 
| 
 | 
   122 
 | 
| 
 | 
   123 close LOG;#GALAXY
 | 
| 
 | 
   124 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY
 | 
| 
 | 
   125 exit(0);
 | 
| 
 | 
   126 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
   127 
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 | 
| 
 | 
   130 #FUNCTIONS
 | 
| 
 | 
   131 #------------------------------------------------------------------------------#
 | 
| 
 | 
   132 #MAIN FUNCTION number 1: Detection of links from mate-pairs data
 | 
| 
 | 
   133 sub createlinks{
 | 
| 
 | 
   134     
 | 
| 
 | 
   135     my %CHR;									#main hash table 1: fragments, links
 | 
| 
 | 
   136     my %CHRID;
 | 
| 
 | 
   137     my @MATEFILES;
 | 
| 
 | 
   138     
 | 
| 
 | 
   139     my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
 | 
| 
 | 
   140     my @path=split(/\//,$output_prefix);
 | 
| 
 | 
   141     $output_prefix=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   142     my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
 | 
| 
 | 
   143     my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
 | 
| 
 | 
   144     
 | 
| 
 | 
   145     shearingChromosome(\%CHR, \%CHRID,						#making the genomic fragment library with the detection parameters
 | 
| 
 | 
   146 		     $CONF{detection}{window_size},
 | 
| 
 | 
   147 		     $CONF{detection}{step_length},
 | 
| 
 | 
   148 		     $CONF{general}{cmap_file});
 | 
| 
 | 
   149     
 | 
| 
 | 
   150     if($CONF{detection}{split_mate_file}){
 | 
| 
 | 
   151     
 | 
| 
 | 
   152 	splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
 | 
| 
 | 
   153 		      $CONF{general}{sv_type},
 | 
| 
 | 
   154 		      $CONF{general}{mates_file},
 | 
| 
 | 
   155 		      $CONF{general}{input_format},
 | 
| 
 | 
   156 		      $CONF{general}{read_lengths}
 | 
| 
 | 
   157 		      );
 | 
| 
 | 
   158     }else{
 | 
| 
 | 
   159 	
 | 
| 
 | 
   160 	@MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted mate files already created at $CONF{general}{tmp_dir} :$!";
 | 
| 
 | 
   161 	chomp(@MATEFILES);
 | 
| 
 | 
   162 	print LOG "# Splitted mate files already created.\n";
 | 
| 
 | 
   163     }
 | 
| 
 | 
   164     
 | 
| 
 | 
   165     
 | 
| 
 | 
   166     #Parallelization of the linking per chromosome for intra +  interchrs
 | 
| 
 | 
   167     my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
 | 
| 
 | 
   168     
 | 
| 
 | 
   169     foreach my $matefile (@MATEFILES){
 | 
| 
 | 
   170 	
 | 
| 
 | 
   171 	my $pid = $pm->start and next;
 | 
| 
 | 
   172 	getlinks(\%CHR, \%CHRID, $matefile);
 | 
| 
 | 
   173 	$pm->finish;
 | 
| 
 | 
   174     
 | 
| 
 | 
   175     }
 | 
| 
 | 
   176     $pm->wait_all_children;
 | 
| 
 | 
   177     
 | 
| 
 | 
   178     #Merge the chromosome links file into only one
 | 
| 
 | 
   179     my @LINKFILES= qx{ls $tmp_links_prefix*links} or die "# Error: No links files created at $CONF{general}{tmp_dir} :$!";
 | 
| 
 | 
   180     chomp(@LINKFILES);
 | 
| 
 | 
   181     catFiles( \@LINKFILES => "$output_prefix.links" );
 | 
| 
 | 
   182     
 | 
| 
 | 
   183     system "rm $pt_links_file; ln -s $output_prefix.links $pt_links_file" if (defined $pt_links_file); #GALAXY
 | 
| 
 | 
   184     print LOG "# Linking end procedure : output created: $output_prefix.links\n";
 | 
| 
 | 
   185     #unlink(@LINKFILES);
 | 
| 
 | 
   186     #unlink(@MATEFILES);
 | 
| 
 | 
   187 	
 | 
| 
 | 
   188     undef %CHR;
 | 
| 
 | 
   189     undef %CHRID;
 | 
| 
 | 
   190     
 | 
| 
 | 
   191 }
 | 
| 
 | 
   192 #------------------------------------------------------------------------------#
 | 
| 
 | 
   193 #------------------------------------------------------------------------------#
 | 
| 
 | 
   194 sub getlinks {
 | 
| 
 | 
   195     
 | 
| 
 | 
   196     my ($chr,$chrID,$tmp_mates_prefix)=@_;
 | 
| 
 | 
   197     
 | 
| 
 | 
   198     my $tmp_links_prefix=$tmp_mates_prefix;
 | 
| 
 | 
   199     $tmp_links_prefix=~s/\/mates\//\/links\//;
 | 
| 
 | 
   200     
 | 
| 
 | 
   201     my %PAIR;									#main hash table 2: pairs
 | 
| 
 | 
   202     
 | 
| 
 | 
   203     linking($chr,$chrID, \%PAIR,						#creation of all links from chromosome coordinates of pairs
 | 
| 
 | 
   204 	  $CONF{general}{read_lengths},
 | 
| 
 | 
   205 	  $CONF{detection}{window_size},
 | 
| 
 | 
   206 	  $CONF{detection}{step_length},
 | 
| 
 | 
   207 	  $tmp_mates_prefix,
 | 
| 
 | 
   208 	  $CONF{general}{input_format},
 | 
| 
 | 
   209 	  $CONF{general}{sv_type},
 | 
| 
 | 
   210 	  "$tmp_links_prefix.links.mapped"
 | 
| 
 | 
   211 	  );
 | 
| 
 | 
   212     
 | 
| 
 | 
   213     getUniqueLinks("$tmp_links_prefix.links.mapped",					#remove the doublons
 | 
| 
 | 
   214 		   "$tmp_links_prefix.links.unique");
 | 
| 
 | 
   215     
 | 
| 
 | 
   216     defineCoordsLinks($chr,$chrID, \%PAIR,					#definition of the precise coordinates of links
 | 
| 
 | 
   217 		      $CONF{general}{input_format},
 | 
| 
 | 
   218 		      $CONF{general}{sv_type},
 | 
| 
 | 
   219 		      $CONF{general}{read_lengths},
 | 
| 
 | 
   220 		      "$tmp_links_prefix.links.unique",
 | 
| 
 | 
   221 		      "$tmp_links_prefix.links.unique_defined"); 
 | 
| 
 | 
   222     
 | 
| 
 | 
   223     sortLinks("$tmp_links_prefix.links.unique_defined",				#sorting links from coordinates
 | 
| 
 | 
   224 	      "$tmp_links_prefix.links.sorted");
 | 
| 
 | 
   225 
 | 
| 
 | 
   226     removeFullyOverlappedLinks("$tmp_links_prefix.links.sorted",			#remove redundant links
 | 
| 
 | 
   227 			       "$tmp_links_prefix.links",1);				#file output
 | 
| 
 | 
   228     
 | 
| 
 | 
   229    
 | 
| 
 | 
   230     undef %PAIR;
 | 
| 
 | 
   231     
 | 
| 
 | 
   232     unlink("$tmp_links_prefix.links.mapped",
 | 
| 
 | 
   233 	   "$tmp_links_prefix.links.unique",
 | 
| 
 | 
   234 	   "$tmp_links_prefix.links.unique_defined",
 | 
| 
 | 
   235 	   "$tmp_links_prefix.links.sorted");
 | 
| 
 | 
   236 }
 | 
| 
 | 
   237 #------------------------------------------------------------------------------#
 | 
| 
 | 
   238 #------------------------------------------------------------------------------#
 | 
| 
 | 
   239 sub splitMateFile{
 | 
| 
 | 
   240     
 | 
| 
 | 
   241     my ($chr,$chrID,$files_list,$output_prefix,$sv_type,$mates_file,$input_format,$tag_length)=@_;
 | 
| 
 | 
   242     
 | 
| 
 | 
   243     print LOG "# Splitting the mate file \"$mates_file\" for parallel processing...\n";
 | 
| 
 | 
   244     
 | 
| 
 | 
   245     my %filesHandle;
 | 
| 
 | 
   246     
 | 
| 
 | 
   247     #fichier matefile inter
 | 
| 
 | 
   248     if($sv_type=~/^(all|inter)$/){
 | 
| 
 | 
   249 	my $newFileName="$output_prefix.interchrs";
 | 
| 
 | 
   250 	push(@{$files_list},$newFileName);
 | 
| 
 | 
   251 	my $fh = new FileHandle;
 | 
| 
 | 
   252 	$fh->open(">$newFileName");
 | 
| 
 | 
   253 	$filesHandle{inter}=$fh;
 | 
| 
 | 
   254     }
 | 
| 
 | 
   255     
 | 
| 
 | 
   256     #fichiers matefiles intra
 | 
| 
 | 
   257     if($sv_type=~/^(all|intra)$/){
 | 
| 
 | 
   258 	foreach my $k (1..$chr->{nb_chrs}){
 | 
| 
 | 
   259 	    my $newFileName=$output_prefix.".".$chr->{$k}->{name};
 | 
| 
 | 
   260 	    push(@{$files_list},$newFileName);
 | 
| 
 | 
   261 	    my $fh = new FileHandle;
 | 
| 
 | 
   262 	    $fh->open(">$newFileName");
 | 
| 
 | 
   263 	    $filesHandle{$k}=$fh;
 | 
| 
 | 
   264 	}
 | 
| 
 | 
   265     }
 | 
| 
 | 
   266     
 | 
| 
 | 
   267     if ($mates_file =~ /.gz$/) {
 | 
| 
 | 
   268 	open(MATES, "gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
 | 
| 
 | 
   269     }elsif($mates_file =~ /.bam$/){
 | 
| 
 | 
   270 	open(MATES, "$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
 | 
| 
 | 
   271     }else{
 | 
| 
 | 
   272 	open MATES, "<".$mates_file or die "$0: can't open ".$mates_file.":$!\n";
 | 
| 
 | 
   273     }
 | 
| 
 | 
   274     
 | 
| 
 | 
   275     
 | 
| 
 | 
   276     while(<MATES>){
 | 
| 
 | 
   277 	
 | 
| 
 | 
   278 	my @t=split;								
 | 
| 
 | 
   279 	my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
 | 
| 
 | 
   280 	
 | 
| 
 | 
   281 	next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
 | 
| 
 | 
   282 	
 | 
| 
 | 
   283 	next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
 | 
| 
 | 
   284 	
 | 
| 
 | 
   285 	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
 | 
| 
 | 
   286 	
 | 
| 
 | 
   287 	if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
 | 
| 
 | 
   288 	    my $fh2print=$filesHandle{inter};
 | 
| 
 | 
   289 	    print $fh2print join("\t",@t)."\n";
 | 
| 
 | 
   290 	}
 | 
| 
 | 
   291 	
 | 
| 
 | 
   292 	if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
 | 
| 
 | 
   293 	    my $fh2print=$filesHandle{$chr_read1};
 | 
| 
 | 
   294 	    print $fh2print join("\t",@t)."\n";
 | 
| 
 | 
   295 	    
 | 
| 
 | 
   296 	}
 | 
| 
 | 
   297     }
 | 
| 
 | 
   298     
 | 
| 
 | 
   299     foreach my $name (keys %filesHandle){
 | 
| 
 | 
   300 	my $fh=$filesHandle{$name};
 | 
| 
 | 
   301 	$fh->close;
 | 
| 
 | 
   302     }
 | 
| 
 | 
   303     
 | 
| 
 | 
   304     print LOG "# Splitted mate files of \"$mates_file\" created.\n";
 | 
| 
 | 
   305 }
 | 
| 
 | 
   306 
 | 
| 
 | 
   307 
 | 
| 
 | 
   308 #------------------------------------------------------------------------------#
 | 
| 
 | 
   309 #------------------------------------------------------------------------------#
 | 
| 
 | 
   310 sub filterlinks{
 | 
| 
 | 
   311     
 | 
| 
 | 
   312     my %CHR;
 | 
| 
 | 
   313     my %CHRID;
 | 
| 
 | 
   314     my @LINKFILES;
 | 
| 
 | 
   315     my @FLINKFILES;
 | 
| 
 | 
   316     
 | 
| 
 | 
   317     my $output_prefix=$CONF{general}{mates_file}.".".$CONF{general}{sv_type};
 | 
| 
 | 
   318     my @path=split(/\//,$output_prefix);
 | 
| 
 | 
   319     $output_prefix=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   320     my $tmp_links_prefix=$CONF{general}{tmp_dir}."links/".$path[$#path];
 | 
| 
 | 
   321     
 | 
| 
 | 
   322     createChrHashTables(\%CHR,\%CHRID,
 | 
| 
 | 
   323 			$CONF{general}{cmap_file});
 | 
| 
 | 
   324     
 | 
| 
 | 
   325     if($CONF{filtering}{split_link_file}){
 | 
| 
 | 
   326     
 | 
| 
 | 
   327 	splitLinkFile(\%CHR, \%CHRID, \@LINKFILES,
 | 
| 
 | 
   328 		      $tmp_links_prefix,
 | 
| 
 | 
   329 		      $CONF{general}{sv_type},
 | 
| 
 | 
   330 		      "$output_prefix.links",
 | 
| 
 | 
   331 		      );
 | 
| 
 | 
   332     }else{
 | 
| 
 | 
   333 	
 | 
| 
 | 
   334 	@LINKFILES=qx{ls $tmp_links_prefix*links} or die "# Error: No splitted link files already created\n";
 | 
| 
 | 
   335 	chomp(@LINKFILES);
 | 
| 
 | 
   336 	print LOG "# Splitted link files already created.\n";
 | 
| 
 | 
   337     }
 | 
| 
 | 
   338     
 | 
| 
 | 
   339     #Parallelization of the filtering per chromosome for intra +  interchrs
 | 
| 
 | 
   340     my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
 | 
| 
 | 
   341     
 | 
| 
 | 
   342     foreach my $linkfile (@LINKFILES){
 | 
| 
 | 
   343 	
 | 
| 
 | 
   344 	my $pid = $pm->start and next;
 | 
| 
 | 
   345 	getFilteredlinks(\%CHR, \%CHRID, $linkfile);
 | 
| 
 | 
   346 	$pm->finish;
 | 
| 
 | 
   347     
 | 
| 
 | 
   348     }
 | 
| 
 | 
   349     $pm->wait_all_children;
 | 
| 
 | 
   350     
 | 
| 
 | 
   351     #Merge the chromosome links file into only one
 | 
| 
 | 
   352     @FLINKFILES= qx{ls $tmp_links_prefix*filtered} or die "# Error: No links files created\n";
 | 
| 
 | 
   353     chomp(@FLINKFILES);
 | 
| 
 | 
   354     catFiles( \@FLINKFILES => "$output_prefix.links.filtered" );
 | 
| 
 | 
   355     
 | 
| 
 | 
   356     system "rm $pt_flinks_file; ln -s $output_prefix.links.filtered $pt_flinks_file" if (defined $pt_flinks_file); #GALAXY
 | 
| 
 | 
   357     print LOG"# Filtering end procedure : output created: $output_prefix.links.filtered\n";
 | 
| 
 | 
   358     
 | 
| 
 | 
   359     undef %CHR;
 | 
| 
 | 
   360     undef %CHRID;
 | 
| 
 | 
   361     
 | 
| 
 | 
   362 }
 | 
| 
 | 
   363 #------------------------------------------------------------------------------#
 | 
| 
 | 
   364 #------------------------------------------------------------------------------#
 | 
| 
 | 
   365 sub splitLinkFile{
 | 
| 
 | 
   366     
 | 
| 
 | 
   367     my ($chr,$chrID,$files_list,$input_prefix,$sv_type,$link_file)=@_;
 | 
| 
 | 
   368     
 | 
| 
 | 
   369     print LOG "# Splitting the link file for parallel processing...\n";
 | 
| 
 | 
   370     
 | 
| 
 | 
   371     my %filesHandle;
 | 
| 
 | 
   372     
 | 
| 
 | 
   373     #fichier matefile inter
 | 
| 
 | 
   374     if($sv_type=~/^(all|inter)$/){
 | 
| 
 | 
   375 	my $newFileName="$input_prefix.interchrs.links";
 | 
| 
 | 
   376 	push(@{$files_list},$newFileName);
 | 
| 
 | 
   377 	my $fh = new FileHandle;
 | 
| 
 | 
   378 	$fh->open(">$newFileName");
 | 
| 
 | 
   379 	$filesHandle{inter}=$fh;
 | 
| 
 | 
   380     }
 | 
| 
 | 
   381     
 | 
| 
 | 
   382     #fichiers matefiles intra
 | 
| 
 | 
   383     if($sv_type=~/^(all|intra)$/){
 | 
| 
 | 
   384 	foreach my $k (1..$chr->{nb_chrs}){
 | 
| 
 | 
   385 	    my $newFileName=$input_prefix.".".$chr->{$k}->{name}.".links";
 | 
| 
 | 
   386 	    push(@{$files_list},$newFileName);
 | 
| 
 | 
   387 	    my $fh = new FileHandle;
 | 
| 
 | 
   388 	    $fh->open(">$newFileName");
 | 
| 
 | 
   389 	    $filesHandle{$k}=$fh;
 | 
| 
 | 
   390 	}
 | 
| 
 | 
   391     }
 | 
| 
 | 
   392     
 | 
| 
 | 
   393     open LINKS, "<".$link_file or die "$0: can't open ".$link_file.":$!\n";
 | 
| 
 | 
   394     while(<LINKS>){
 | 
| 
 | 
   395 	
 | 
| 
 | 
   396 	my @t=split;
 | 
| 
 | 
   397 	my ($chr_read1,$chr_read2)=($t[0],$t[3]);							
 | 
| 
 | 
   398 	
 | 
| 
 | 
   399 	next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
 | 
| 
 | 
   400 	
 | 
| 
 | 
   401 	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
 | 
| 
 | 
   402 	
 | 
| 
 | 
   403 	if( ($sv_type=~/^(all|inter)$/) && ($chr_read1 ne $chr_read2) ){
 | 
| 
 | 
   404 	    my $fh2print=$filesHandle{inter};
 | 
| 
 | 
   405 	    print $fh2print join("\t",@t)."\n";
 | 
| 
 | 
   406 	}
 | 
| 
 | 
   407 	
 | 
| 
 | 
   408 	if( ($sv_type=~/^(all|intra)$/) && ($chr_read1 eq $chr_read2) ){
 | 
| 
 | 
   409 	    my $fh2print=$filesHandle{$chr_read1};
 | 
| 
 | 
   410 	    print $fh2print join("\t",@t)."\n";
 | 
| 
 | 
   411 	    
 | 
| 
 | 
   412 	}
 | 
| 
 | 
   413     }
 | 
| 
 | 
   414     
 | 
| 
 | 
   415     foreach my $name (keys %filesHandle){
 | 
| 
 | 
   416 	my $fh=$filesHandle{$name};
 | 
| 
 | 
   417 	$fh->close;
 | 
| 
 | 
   418     }
 | 
| 
 | 
   419     
 | 
| 
 | 
   420     print LOG "# Splitted link files created.\n";
 | 
| 
 | 
   421 }
 | 
| 
 | 
   422 
 | 
| 
 | 
   423 
 | 
| 
 | 
   424 #------------------------------------------------------------------------------#
 | 
| 
 | 
   425 #------------------------------------------------------------------------------#
 | 
| 
 | 
   426 #MAIN FUNCTION number 2: Filtering processing
 | 
| 
 | 
   427 sub getFilteredlinks {
 | 
| 
 | 
   428     
 | 
| 
 | 
   429     my ($chr,$chrID,$tmp_links_prefix)=@_;
 | 
| 
 | 
   430     my %PAIR;
 | 
| 
 | 
   431 
 | 
| 
 | 
   432     strandFiltering($chr,$chrID,
 | 
| 
 | 
   433 		     $CONF{filtering}{nb_pairs_threshold},			#filtering of links
 | 
| 
 | 
   434 		     $CONF{filtering}{strand_filtering},
 | 
| 
 | 
   435 		     $CONF{filtering}{chromosomes},
 | 
| 
 | 
   436 		     $CONF{general}{input_format},
 | 
| 
 | 
   437 		     $CONF{general}{cmap_file},
 | 
| 
 | 
   438 		     $CONF{general}{mates_orientation},
 | 
| 
 | 
   439 		     $CONF{general}{read_lengths},
 | 
| 
 | 
   440 		    $tmp_links_prefix,
 | 
| 
 | 
   441 		    "$tmp_links_prefix.filtered",
 | 
| 
 | 
   442 		    ); 						
 | 
| 
 | 
   443 
 | 
| 
 | 
   444     if($CONF{filtering}{strand_filtering}){					#re-definition of links coordinates with strand filtering
 | 
| 
 | 
   445 
 | 
| 
 | 
   446 	my @tmpfiles;
 | 
| 
 | 
   447 	
 | 
| 
 | 
   448 	rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_unique");
 | 
| 
 | 
   449 	
 | 
| 
 | 
   450 	getUniqueLinks("$tmp_links_prefix.filtered_unique",
 | 
| 
 | 
   451 		       "$tmp_links_prefix.filtered");
 | 
| 
 | 
   452 	
 | 
| 
 | 
   453 	push(@tmpfiles,"$tmp_links_prefix.filtered_unique");
 | 
| 
 | 
   454 	
 | 
| 
 | 
   455 	if($CONF{filtering}{order_filtering}){					#filtering using the order
 | 
| 
 | 
   456 	    
 | 
| 
 | 
   457 	    rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_ordered");
 | 
| 
 | 
   458 	    
 | 
| 
 | 
   459 	    orderFiltering($chr,$chrID,
 | 
| 
 | 
   460 			      $CONF{filtering}{nb_pairs_threshold},
 | 
| 
 | 
   461 			      $CONF{filtering}{nb_pairs_order_threshold},			   
 | 
| 
 | 
   462 			      $CONF{filtering}{mu_length},
 | 
| 
 | 
   463 			      $CONF{filtering}{sigma_length},
 | 
| 
 | 
   464 			      $CONF{general}{mates_orientation},	      
 | 
| 
 | 
   465 			      $CONF{general}{read_lengths},
 | 
| 
 | 
   466 			      "$tmp_links_prefix.filtered_ordered", 
 | 
| 
 | 
   467 			      "$tmp_links_prefix.filtered",				
 | 
| 
 | 
   468 			     );
 | 
| 
 | 
   469 	    
 | 
| 
 | 
   470 	    push(@tmpfiles,"$tmp_links_prefix.filtered_ordered");
 | 
| 
 | 
   471 	}
 | 
| 
 | 
   472 	
 | 
| 
 | 
   473 	if (($CONF{filtering}{insert_size_filtering})&&
 | 
| 
 | 
   474 	    ($CONF{general}{sv_type} ne 'inter')){ 				 
 | 
| 
 | 
   475 	    
 | 
| 
 | 
   476 	    rename("$tmp_links_prefix.filtered","$tmp_links_prefix.filtered_withoutIndelSize");
 | 
| 
 | 
   477 	    
 | 
| 
 | 
   478 	    addInsertionInfo($chr,$chrID,
 | 
| 
 | 
   479 			     $CONF{filtering}{nb_pairs_threshold},
 | 
| 
 | 
   480 			     $CONF{filtering}{order_filtering},
 | 
| 
 | 
   481 			     $CONF{filtering}{indel_sigma_threshold},
 | 
| 
 | 
   482 			     $CONF{filtering}{dup_sigma_threshold},
 | 
| 
 | 
   483 			     $CONF{filtering}{singleton_sigma_threshold}, 
 | 
| 
 | 
   484 			     $CONF{filtering}{mu_length},
 | 
| 
 | 
   485 			     $CONF{filtering}{sigma_length},
 | 
| 
 | 
   486 			     $CONF{general}{mates_orientation},
 | 
| 
 | 
   487 			     $CONF{general}{read_lengths},
 | 
| 
 | 
   488 			     "$tmp_links_prefix.filtered_withoutIndelSize", 
 | 
| 
 | 
   489 			     "$tmp_links_prefix.filtered"			
 | 
| 
 | 
   490 			     );
 | 
| 
 | 
   491 	    
 | 
| 
 | 
   492 	    push(@tmpfiles,"$tmp_links_prefix.filtered_withoutIndelSize");    
 | 
| 
 | 
   493 	}
 | 
| 
 | 
   494 	
 | 
| 
 | 
   495 	sortLinks("$tmp_links_prefix.filtered",
 | 
| 
 | 
   496 		  "$tmp_links_prefix.filtered_sorted");
 | 
| 
 | 
   497 	
 | 
| 
 | 
   498 	removeFullyOverlappedLinks("$tmp_links_prefix.filtered_sorted",
 | 
| 
 | 
   499 			           "$tmp_links_prefix.filtered_nodup",
 | 
| 
 | 
   500 				   );				
 | 
| 
 | 
   501 	
 | 
| 
 | 
   502 	postFiltering("$tmp_links_prefix.filtered_nodup",
 | 
| 
 | 
   503 		      "$tmp_links_prefix.filtered",
 | 
| 
 | 
   504 		       $CONF{filtering}{final_score_threshold});
 | 
| 
 | 
   505 	
 | 
| 
 | 
   506 	push(@tmpfiles,"$tmp_links_prefix.filtered_sorted","$tmp_links_prefix.filtered_nodup");
 | 
| 
 | 
   507 	
 | 
| 
 | 
   508 	unlink(@tmpfiles);
 | 
| 
 | 
   509 	
 | 
| 
 | 
   510 	
 | 
| 
 | 
   511     }
 | 
| 
 | 
   512     undef %PAIR;
 | 
| 
 | 
   513     
 | 
| 
 | 
   514 }
 | 
| 
 | 
   515 #------------------------------------------------------------------------------#
 | 
| 
 | 
   516 #------------------------------------------------------------------------------#
 | 
| 
 | 
   517 #MAIN FUNCTION number 3: Circos format conversion for links
 | 
| 
 | 
   518 sub links2circos{
 | 
| 
 | 
   519     
 | 
| 
 | 
   520     my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
 | 
| 
 | 
   521     my @path=split(/\//,$input_file);
 | 
| 
 | 
   522     $input_file=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   523     
 | 
| 
 | 
   524     my $output_file.=$input_file.".segdup.txt";
 | 
| 
 | 
   525     
 | 
| 
 | 
   526     links2segdup($CONF{circos}{organism_id},
 | 
| 
 | 
   527 		 $CONF{circos}{colorcode},
 | 
| 
 | 
   528 		 $input_file,
 | 
| 
 | 
   529 		 $output_file);							#circos file output
 | 
| 
 | 
   530     
 | 
| 
 | 
   531     system "rm $pt_circos_file; ln -s $output_file $pt_circos_file" if (defined $pt_circos_file); #GALAXY
 | 
| 
 | 
   532 }
 | 
| 
 | 
   533 #------------------------------------------------------------------------------#
 | 
| 
 | 
   534 #------------------------------------------------------------------------------#
 | 
| 
 | 
   535 #MAIN FUNCTION number 4: Bed format conversion for links
 | 
| 
 | 
   536 sub links2bed{
 | 
| 
 | 
   537     
 | 
| 
 | 
   538     my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
 | 
| 
 | 
   539     my @path=split(/\//,$input_file);
 | 
| 
 | 
   540     $input_file=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   541     
 | 
| 
 | 
   542     my $output_file.=$input_file.".bed.txt";
 | 
| 
 | 
   543     
 | 
| 
 | 
   544     links2bedfile($CONF{general}{read_lengths},
 | 
| 
 | 
   545 		  $CONF{bed}{colorcode},
 | 
| 
 | 
   546 		  $input_file,
 | 
| 
 | 
   547 		  $output_file);						#bed file output
 | 
| 
 | 
   548     
 | 
| 
 | 
   549     system "rm $pt_bed_file; ln -s $output_file $pt_bed_file" if (defined $pt_bed_file); #GALAXY
 | 
| 
 | 
   550     
 | 
| 
 | 
   551 }
 | 
| 
 | 
   552 #------------------------------------------------------------------------------#
 | 
| 
 | 
   553 #------------------------------------------------------------------------------#
 | 
| 
 | 
   554 #MAIN FUNCTION number 6: Bed format conversion for links
 | 
| 
 | 
   555 sub links2SV{
 | 
| 
 | 
   556     
 | 
| 
 | 
   557     my $input_file=$CONF{general}{mates_file}.".".$CONF{general}{sv_type}.".links.filtered";
 | 
| 
 | 
   558     
 | 
| 
 | 
   559     my @path=split(/\//,$input_file);
 | 
| 
 | 
   560     $input_file=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   561     
 | 
| 
 | 
   562     my $output_file.=$input_file.".sv.txt";
 | 
| 
 | 
   563     
 | 
| 
 | 
   564     
 | 
| 
 | 
   565     links2SVfile( $input_file,
 | 
| 
 | 
   566 		  $output_file);
 | 
| 
 | 
   567     
 | 
| 
 | 
   568      system "rm $pt_sv_file; ln -s $output_file $pt_sv_file" if (defined $pt_sv_file); #GALAXY	
 | 
| 
 | 
   569 }
 | 
| 
 | 
   570 #------------------------------------------------------------------------------#
 | 
| 
 | 
   571 #------------------------------------------------------------------------------#
 | 
| 
 | 
   572 #MAIN FUNCTION number 7: copy number variations, coverage ratio calculation
 | 
| 
 | 
   573 sub cnv{
 | 
| 
 | 
   574     
 | 
| 
 | 
   575     my %CHR;
 | 
| 
 | 
   576     my %CHRID;
 | 
| 
 | 
   577     my @MATEFILES;
 | 
| 
 | 
   578     my @MATEFILES_REF;
 | 
| 
 | 
   579     
 | 
| 
 | 
   580     my $output_prefix=$CONF{general}{mates_file};
 | 
| 
 | 
   581     my $output_prefix_ref=$CONF{detection}{mates_file_ref};
 | 
| 
 | 
   582     my @path=split(/\//,$output_prefix);
 | 
| 
 | 
   583     my @path_ref=split(/\//,$output_prefix_ref);
 | 
| 
 | 
   584     $output_prefix=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   585     $output_prefix_ref=$CONF{general}{output_dir}.$path_ref[$#path_ref];
 | 
| 
 | 
   586     my $tmp_mates_prefix=$CONF{general}{tmp_dir}."mates/".$path[$#path];
 | 
| 
 | 
   587     my $tmp_mates_prefix_ref=$CONF{general}{tmp_dir}."mates/".$path_ref[$#path_ref];
 | 
| 
 | 
   588     my $tmp_density_prefix=$CONF{general}{tmp_dir}."density/".$path[$#path];
 | 
| 
 | 
   589     
 | 
| 
 | 
   590     shearingChromosome(\%CHR, \%CHRID,						#making the genomic fragment library with the detection parameters
 | 
| 
 | 
   591 		     $CONF{detection}{window_size},
 | 
| 
 | 
   592 		     $CONF{detection}{step_length},
 | 
| 
 | 
   593 		     $CONF{general}{cmap_file});
 | 
| 
 | 
   594     
 | 
| 
 | 
   595     if($CONF{detection}{split_mate_file}){
 | 
| 
 | 
   596     
 | 
| 
 | 
   597 	splitMateFile(\%CHR, \%CHRID, \@MATEFILES, $tmp_mates_prefix,
 | 
| 
 | 
   598 		      "intra",
 | 
| 
 | 
   599 		      $CONF{general}{mates_file},
 | 
| 
 | 
   600 		      $CONF{general}{input_format},
 | 
| 
 | 
   601 		      $CONF{general}{read_lengths}
 | 
| 
 | 
   602 		      );
 | 
| 
 | 
   603 	
 | 
| 
 | 
   604 	splitMateFile(\%CHR, \%CHRID, \@MATEFILES_REF, $tmp_mates_prefix_ref,
 | 
| 
 | 
   605 		      "intra",
 | 
| 
 | 
   606 		      $CONF{detection}{mates_file_ref},
 | 
| 
 | 
   607 		      $CONF{general}{input_format},
 | 
| 
 | 
   608 		      $CONF{general}{read_lengths}
 | 
| 
 | 
   609 		      );
 | 
| 
 | 
   610 	
 | 
| 
 | 
   611 	
 | 
| 
 | 
   612     }else{
 | 
| 
 | 
   613 	
 | 
| 
 | 
   614 	@MATEFILES=qx{ls $tmp_mates_prefix*} or die "# Error: No splitted sample mate files of \"$CONF{general}{mates_file}\" already created at $CONF{general}{tmp_dir} :$!";
 | 
| 
 | 
   615 	chomp(@MATEFILES);
 | 
| 
 | 
   616 	@MATEFILES_REF=qx{ls $tmp_mates_prefix_ref*} or die "# Error: No splitted reference mate files of \"$CONF{detection}{mates_file_ref}\" already created at $CONF{general}{tmp_dir} :$!";
 | 
| 
 | 
   617 	chomp(@MATEFILES_REF);
 | 
| 
 | 
   618 	print LOG "# Splitted sample and reference mate files already created.\n";
 | 
| 
 | 
   619     }
 | 
| 
 | 
   620     
 | 
| 
 | 
   621     #Parallelization of the cnv per chromosome
 | 
| 
 | 
   622     my $pm = new Parallel::ForkManager($CONF{general}{num_threads});
 | 
| 
 | 
   623     
 | 
| 
 | 
   624     foreach my $file (0..$#MATEFILES){
 | 
| 
 | 
   625 	
 | 
| 
 | 
   626 	my $pid = $pm->start and next;
 | 
| 
 | 
   627 
 | 
| 
 | 
   628 	densityCalculation(\%CHR, \%CHRID, $file,
 | 
| 
 | 
   629 	  $CONF{general}{read_lengths},
 | 
| 
 | 
   630 	  $CONF{detection}{window_size},
 | 
| 
 | 
   631 	  $CONF{detection}{step_length},
 | 
| 
 | 
   632 	  \@MATEFILES,
 | 
| 
 | 
   633 	  \@MATEFILES_REF,
 | 
| 
 | 
   634 	  $MATEFILES[$file].".density",
 | 
| 
 | 
   635 	  $CONF{general}{input_format});
 | 
| 
 | 
   636 
 | 
| 
 | 
   637 	$pm->finish;
 | 
| 
 | 
   638 	
 | 
| 
 | 
   639     }
 | 
| 
 | 
   640     $pm->wait_all_children;
 | 
| 
 | 
   641     
 | 
| 
 | 
   642     #Merge the chromosome links file into only one
 | 
| 
 | 
   643     my @DENSITYFILES= qx{ls $tmp_density_prefix*density} or die "# Error: No density files created at $CONF{general}{tmp_dir} :$!";
 | 
| 
 | 
   644     chomp(@DENSITYFILES);
 | 
| 
 | 
   645     catFiles( \@DENSITYFILES => "$output_prefix.density" );
 | 
| 
 | 
   646 
 | 
| 
 | 
   647     print LOG "# cnv end procedure : output created: $output_prefix.density\n";
 | 
| 
 | 
   648     
 | 
| 
 | 
   649     
 | 
| 
 | 
   650     undef %CHR;
 | 
| 
 | 
   651     undef %CHRID;
 | 
| 
 | 
   652     
 | 
| 
 | 
   653 }
 | 
| 
 | 
   654 #------------------------------------------------------------------------------#
 | 
| 
 | 
   655 #------------------------------------------------------------------------------#
 | 
| 
 | 
   656 #MAIN FUNCTION number 8: Circos format conversion for cnv ratios
 | 
| 
 | 
   657 sub ratio2circos{
 | 
| 
 | 
   658     
 | 
| 
 | 
   659     my $input_file=$CONF{general}{mates_file}.".density";
 | 
| 
 | 
   660     
 | 
| 
 | 
   661     my @path=split(/\//,$input_file);
 | 
| 
 | 
   662     $input_file=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   663     
 | 
| 
 | 
   664     my $output_file.=$input_file.".segdup.txt";
 | 
| 
 | 
   665     
 | 
| 
 | 
   666     ratio2segdup($CONF{circos}{organism_id},		
 | 
| 
 | 
   667 		 $input_file,
 | 
| 
 | 
   668 		 $output_file);
 | 
| 
 | 
   669 }
 | 
| 
 | 
   670 #------------------------------------------------------------------------------#
 | 
| 
 | 
   671 #MAIN FUNCTION number 9: BedGraph format conversion for cnv ratios
 | 
| 
 | 
   672 sub ratio2bedgraph{
 | 
| 
 | 
   673     
 | 
| 
 | 
   674     my $input_file=$CONF{general}{mates_file}.".density";
 | 
| 
 | 
   675     
 | 
| 
 | 
   676     my @path=split(/\//,$input_file);
 | 
| 
 | 
   677     $input_file=$CONF{general}{output_dir}.$path[$#path];
 | 
| 
 | 
   678     
 | 
| 
 | 
   679     my $output_file.=$input_file.".bedgraph.txt";
 | 
| 
 | 
   680     
 | 
| 
 | 
   681     ratio2bedfile($input_file,
 | 
| 
 | 
   682 		  $output_file);						#bed file output
 | 
| 
 | 
   683 }
 | 
| 
 | 
   684 #------------------------------------------------------------------------------#
 | 
| 
 | 
   685 #------------------------------------------------------------------------------#
 | 
| 
 | 
   686 #Creation of the fragment library
 | 
| 
 | 
   687 sub shearingChromosome{
 | 
| 
 | 
   688     
 | 
| 
 | 
   689     print LOG "# Making the fragments library...\n";
 | 
| 
 | 
   690     
 | 
| 
 | 
   691     my ($chr,$chrID,$window,$step,$cmap_file)=@_;				#window and step sizes parameters
 | 
| 
 | 
   692 
 | 
| 
 | 
   693     createChrHashTables($chr,$chrID,$cmap_file);				#hash tables: chromosome ID <=> chromsomes Name
 | 
| 
 | 
   694     
 | 
| 
 | 
   695     foreach my $k (1..$chr->{nb_chrs}){						
 | 
| 
 | 
   696 	
 | 
| 
 | 
   697 	print LOG"-- $chr->{$k}->{name}\n";
 | 
| 
 | 
   698 	
 | 
| 
 | 
   699 	my $frag=1;
 | 
| 
 | 
   700 	for (my $start=0; $start<$chr->{$k}->{length}; $start+=$step){
 | 
| 
 | 
   701 	    
 | 
| 
 | 
   702 	    my $end=($start<($chr->{$k}->{length})-$window)? $start+$window-1:($chr->{$k}->{length})-1;
 | 
| 
 | 
   703 	    $chr->{$k}->{$frag}=[$start,$end];					#creation of fragments, coordinates storage
 | 
| 
 | 
   704 	    
 | 
| 
 | 
   705 	    if($end==($chr->{$k}->{length})-1){
 | 
| 
 | 
   706 		$chr->{$k}->{nb_frag}=$frag;					#nb of fragments per chromosome
 | 
| 
 | 
   707 		last;
 | 
| 
 | 
   708 	    }
 | 
| 
 | 
   709 	    $frag++;
 | 
| 
 | 
   710 	}
 | 
| 
 | 
   711     }
 | 
| 
 | 
   712 }
 | 
| 
 | 
   713 #------------------------------------------------------------------------------#
 | 
| 
 | 
   714 #------------------------------------------------------------------------------#
 | 
| 
 | 
   715 #Creation of chromosome hash tables from the cmap file
 | 
| 
 | 
   716 sub createChrHashTables{
 | 
| 
 | 
   717     
 | 
| 
 | 
   718     my ($chr,$chrID,$cmap_file)=@_;
 | 
| 
 | 
   719     $chr->{nb_chrs}=0;
 | 
| 
 | 
   720     
 | 
| 
 | 
   721     open CMAP, "<".$cmap_file or die "$0: can't open ".$cmap_file.":$!\n";
 | 
| 
 | 
   722     while(<CMAP>){
 | 
| 
 | 
   723 
 | 
| 
 | 
   724 	if(/^\s+$/){ next;}
 | 
| 
 | 
   725 	my ($k,$name,$length) = split;
 | 
| 
 | 
   726 	$chr->{$k}->{name}=$name;
 | 
| 
 | 
   727 	$chr->{$k}->{length}=$length;
 | 
| 
 | 
   728 	$chrID->{$name}=$k;
 | 
| 
 | 
   729 	$chr->{nb_chrs}++;
 | 
| 
 | 
   730 	
 | 
| 
 | 
   731     }
 | 
| 
 | 
   732     close CMAP;
 | 
| 
 | 
   733 }
 | 
| 
 | 
   734 #------------------------------------------------------------------------------#
 | 
| 
 | 
   735 #------------------------------------------------------------------------------#
 | 
| 
 | 
   736 #Read the mate file according the input format file (solid, eland or sam)
 | 
| 
 | 
   737 sub readMateFile{
 | 
| 
 | 
   738     
 | 
| 
 | 
   739     my ($chr1,$chr2,$pos1,$pos2,$order1,$order2,$t,$file_type,$tag_length)=@_;
 | 
| 
 | 
   740     my ($strand1,$strand2);
 | 
| 
 | 
   741     
 | 
| 
 | 
   742     if($file_type eq "solid"){
 | 
| 
 | 
   743 	
 | 
| 
 | 
   744 	($$chr1,$$chr2,$$pos1,$$pos2,$$order1,$$order2)=($$t[6],$$t[7],$$t[8]+1,$$t[9]+1,1,2); #0-based
 | 
| 
 | 
   745 	
 | 
| 
 | 
   746     }else{
 | 
| 
 | 
   747 	my ($tag_length1,$tag_length2);
 | 
| 
 | 
   748 	($$chr1,$$chr2,$$pos1,$strand1,$$pos2,$strand2,$$order1,$$order2,$tag_length1,$tag_length2)=($$t[11],$$t[12],$$t[7],$$t[8],$$t[9],$$t[10],1,2,length($$t[1]),length($$t[2])) #1-based
 | 
| 
 | 
   749 	if($file_type eq "eland");
 | 
| 
 | 
   750 	
 | 
| 
 | 
   751 	if($file_type eq "sam"){
 | 
| 
 | 
   752 	    
 | 
| 
 | 
   753 	    return 0 if ($$t[0]=~/^@/);						#header sam filtered out
 | 
| 
 | 
   754 	   
 | 
| 
 | 
   755 	    ($$chr1,$$chr2,$$pos1,$$pos2)=($$t[2],$$t[6],$$t[3],$$t[7]);
 | 
| 
 | 
   756 	   
 | 
| 
 | 
   757 	    return 0 if (($$t[1]&0x0004) || ($$t[1]&0x0008));
 | 
| 
 | 
   758 	   
 | 
| 
 | 
   759 	    $$chr2=$$chr1 if($$chr2 eq "=");
 | 
| 
 | 
   760 	   
 | 
| 
 | 
   761 	    $strand1 = (($$t[1]&0x0010))? 'R':'F';
 | 
| 
 | 
   762 	    $strand2 = (($$t[1]&0x0020))? 'R':'F';
 | 
| 
 | 
   763 	    
 | 
| 
 | 
   764 	    $$order1=  (($$t[1]&0x0040))? '1':'2';
 | 
| 
 | 
   765 	    $$order2=  (($$t[1]&0x0080))? '1':'2';
 | 
| 
 | 
   766 	    $tag_length1 = $tag_length->{$$order1};
 | 
| 
 | 
   767 	    $tag_length2 = $tag_length->{$$order2};
 | 
| 
 | 
   768 	}
 | 
| 
 | 
   769 	
 | 
| 
 | 
   770 	$$pos1 = -($$pos1+$tag_length1) if ($strand1 eq "R");		#get sequencing starts
 | 
| 
 | 
   771 	$$pos2 = -($$pos2+$tag_length2) if ($strand2 eq "R");
 | 
| 
 | 
   772     }
 | 
| 
 | 
   773     return 1;
 | 
| 
 | 
   774 }
 | 
| 
 | 
   775 #------------------------------------------------------------------------------#
 | 
| 
 | 
   776 #------------------------------------------------------------------------------#
 | 
| 
 | 
   777 #Parsing of the mates files and creation of links between 2 chromosomal fragments
 | 
| 
 | 
   778 sub linking{
 | 
| 
 | 
   779     
 | 
| 
 | 
   780     my ($chr,$chrID,$pair,$tag_length,$window_dist,$step,$mates_file,$input_format,$sv_type,$links_file)=@_;
 | 
| 
 | 
   781     my %link;
 | 
| 
 | 
   782     
 | 
| 
 | 
   783     my $record=0;
 | 
| 
 | 
   784     my $nb_links=0;
 | 
| 
 | 
   785     my $warn=10000;
 | 
| 
 | 
   786     
 | 
| 
 | 
   787     my @sfile=split(/\./,$mates_file);
 | 
| 
 | 
   788     my $fchr=$sfile[$#sfile];
 | 
| 
 | 
   789     
 | 
| 
 | 
   790     my $fh = new FileHandle;
 | 
| 
 | 
   791    
 | 
| 
 | 
   792     print LOG "# $fchr : Linking procedure...\n";
 | 
| 
 | 
   793     print LOG "-- file=$mates_file\n".
 | 
| 
 | 
   794 	 "-- chromosome=$fchr\n".
 | 
| 
 | 
   795 	 "-- input format=$input_format\n".
 | 
| 
 | 
   796 	 "-- type=$sv_type\n".
 | 
| 
 | 
   797 	 "-- read1 length=$tag_length->{1}, read2 length=$tag_length->{2}\n".
 | 
| 
 | 
   798 	 "-- window size=$window_dist, step length=$step\n";
 | 
| 
 | 
   799     
 | 
| 
 | 
   800     if ($mates_file =~ /.gz$/) {
 | 
| 
 | 
   801 	$fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n"; #gzcat
 | 
| 
 | 
   802     }elsif($mates_file =~ /.bam$/){
 | 
| 
 | 
   803 	$fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
 | 
| 
 | 
   804     }else{
 | 
| 
 | 
   805 	$fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
 | 
| 
 | 
   806     }
 | 
| 
 | 
   807     
 | 
| 
 | 
   808     
 | 
| 
 | 
   809     while(<$fh>){
 | 
| 
 | 
   810 	
 | 
| 
 | 
   811 	my @t=split;								#for each mate-pair
 | 
| 
 | 
   812 	my $mate=$t[0];
 | 
| 
 | 
   813 	my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1,$end_order_read2);
 | 
| 
 | 
   814 	
 | 
| 
 | 
   815 	next if(exists $$pair{$mate});
 | 
| 
 | 
   816 	
 | 
| 
 | 
   817 	next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2, \$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
 | 
| 
 | 
   818 	
 | 
| 
 | 
   819 	next unless (exists $chrID->{$chr_read1} && exists $chrID->{$chr_read2});
 | 
| 
 | 
   820 	
 | 
| 
 | 
   821 	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
 | 
| 
 | 
   822 	
 | 
| 
 | 
   823 	if($sv_type ne "all"){
 | 
| 
 | 
   824 	    if( ($sv_type eq "inter") && ($chr_read1 ne $chr_read2) ||
 | 
| 
 | 
   825 	        ($sv_type eq "intra") && ($chr_read1 eq $chr_read2) ){
 | 
| 
 | 
   826 	    }else{
 | 
| 
 | 
   827 		next;
 | 
| 
 | 
   828 	    }
 | 
| 
 | 
   829 	}
 | 
| 
 | 
   830 	
 | 
| 
 | 
   831 	$$pair{$mate}=[$chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2 ]; 	#fill out the hash pair table (ready for the defineCoordsLinks function)
 | 
| 
 | 
   832 	
 | 
| 
 | 
   833 	$record++; 
 | 
| 
 | 
   834 	
 | 
| 
 | 
   835 	my ($coord_start_read1,$coord_end_read1,$coord_start_read2,$coord_end_read2);	#get the coordinates of each read
 | 
| 
 | 
   836 	
 | 
| 
 | 
   837 	recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
 | 
| 
 | 
   838 	recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
 | 
| 
 | 
   839 	
 | 
| 
 | 
   840 	for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){			#fast genome parsing for link creation
 | 
| 
 | 
   841 	    
 | 
| 
 | 
   842 	    if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
 | 
| 
 | 
   843 		
 | 
| 
 | 
   844 		if(overlap($coord_start_read1,$coord_end_read1,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1])){
 | 
| 
 | 
   845 		    
 | 
| 
 | 
   846 		    for(my $j=1;$j<=$chr->{$chr_read2}->{'nb_frag'};$j++){
 | 
| 
 | 
   847 			
 | 
| 
 | 
   848 			if (abs ($coord_start_read2-${$chr->{$chr_read2}->{$j}}[0]) <= $window_dist) {
 | 
| 
 | 
   849 			    
 | 
| 
 | 
   850 			    if(overlap($coord_start_read2,$coord_end_read2,${$chr->{$chr_read2}->{$j}}[0],${$chr->{$chr_read2}->{$j}}[1])){
 | 
| 
 | 
   851 				
 | 
| 
 | 
   852 				makeLink(\%link,$chr_read1,$i,$chr_read2,$j,$mate,\$nb_links);	#make the link
 | 
| 
 | 
   853 			    }
 | 
| 
 | 
   854 			    
 | 
| 
 | 
   855 			}else{
 | 
| 
 | 
   856 			    
 | 
| 
 | 
   857 			    $j=getNextFrag($coord_start_read2,$j,${$chr->{$chr_read2}->{$j}}[0],$chr->{$chr_read2}->{nb_frag},$window_dist,$step);
 | 
| 
 | 
   858 			}				
 | 
| 
 | 
   859 		    }
 | 
| 
 | 
   860 		}
 | 
| 
 | 
   861 		
 | 
| 
 | 
   862 	    }else{
 | 
| 
 | 
   863 		
 | 
| 
 | 
   864 		$i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
 | 
| 
 | 
   865 	    }
 | 
| 
 | 
   866 	}
 | 
| 
 | 
   867 	
 | 
| 
 | 
   868 	if($record>=$warn){
 | 
| 
 | 
   869 	    print LOG "-- $fchr : $warn mate-pairs analysed - $nb_links links done\n";
 | 
| 
 | 
   870 	    $warn+=10000;
 | 
| 
 | 
   871 	}
 | 
| 
 | 
   872     }
 | 
| 
 | 
   873     $fh->close;
 | 
| 
 | 
   874     
 | 
| 
 | 
   875     if(!$nb_links){
 | 
| 
 | 
   876 	print LOG "-- $fchr : No mate-pairs !\n".
 | 
| 
 | 
   877 	 "-- $fchr : No links have been found with the selected type of structural variations \($sv_type\)\n";
 | 
| 
 | 
   878     }
 | 
| 
 | 
   879     
 | 
| 
 | 
   880     print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_links links done\n";
 | 
| 
 | 
   881     
 | 
| 
 | 
   882     print LOG "-- $fchr : writing...\n";
 | 
| 
 | 
   883     
 | 
| 
 | 
   884     $fh = new FileHandle;
 | 
| 
 | 
   885     
 | 
| 
 | 
   886     $fh->open(">".$links_file) or die "$0: can't write in the output ".$links_file." :$!\n";
 | 
| 
 | 
   887     
 | 
| 
 | 
   888     foreach my $chr1 ( sort { $a <=> $b} keys %link){				#Sorted links output
 | 
| 
 | 
   889 	
 | 
| 
 | 
   890 	foreach my $chr2 ( sort { $a <=> $b} keys %{$link{$chr1}}){
 | 
| 
 | 
   891 	    
 | 
| 
 | 
   892 	    foreach my $frag1 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}}){
 | 
| 
 | 
   893 		
 | 
| 
 | 
   894 		foreach my $frag2 ( sort { $a <=> $b} keys %{$link{$chr1}{$chr2}{$frag1}}){
 | 
| 
 | 
   895 		
 | 
| 
 | 
   896 		    my @count=split(",",$link{$chr1}{$chr2}{$frag1}{$frag2});
 | 
| 
 | 
   897 		    print $fh "$chr->{$chr1}->{name}\t".(${$chr->{$chr1}->{$frag1}}[0]+1)."\t".(${$chr->{$chr1}->{$frag1}}[1]+1)."\t".
 | 
| 
 | 
   898 				"$chr->{$chr2}->{name}\t".(${$chr->{$chr2}->{$frag2}}[0]+1)."\t".(${$chr->{$chr2}->{$frag2}}[1]+1)."\t".
 | 
| 
 | 
   899 				scalar @count."\t".				#nb of read
 | 
| 
 | 
   900 				$link{$chr1}{$chr2}{$frag1}{$frag2}."\n";	#mate list
 | 
| 
 | 
   901 		}  
 | 
| 
 | 
   902 	    }
 | 
| 
 | 
   903 	}
 | 
| 
 | 
   904     }
 | 
| 
 | 
   905     
 | 
| 
 | 
   906     $fh->close;
 | 
| 
 | 
   907     
 | 
| 
 | 
   908     undef %link;
 | 
| 
 | 
   909 
 | 
| 
 | 
   910 }
 | 
| 
 | 
   911 #------------------------------------------------------------------------------#
 | 
| 
 | 
   912 #------------------------------------------------------------------------------#
 | 
| 
 | 
   913 #remove exact links doublons according to the mate list
 | 
| 
 | 
   914 sub getUniqueLinks{
 | 
| 
 | 
   915     
 | 
| 
 | 
   916     my ($links_file,$nrlinks_file)=@_;
 | 
| 
 | 
   917     my %links;
 | 
| 
 | 
   918     my %pt;
 | 
| 
 | 
   919     my $nb_links;
 | 
| 
 | 
   920     my $n=1;
 | 
| 
 | 
   921     
 | 
| 
 | 
   922     my $record=0;
 | 
| 
 | 
   923     my $warn=300000;
 | 
| 
 | 
   924     
 | 
| 
 | 
   925     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
   926     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
   927     
 | 
| 
 | 
   928     my $fh = new FileHandle;
 | 
| 
 | 
   929 
 | 
| 
 | 
   930     print LOG "# $fchr : Getting unique links...\n";
 | 
| 
 | 
   931     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
   932    
 | 
| 
 | 
   933     while(<$fh>){
 | 
| 
 | 
   934 	
 | 
| 
 | 
   935 	my @t=split;
 | 
| 
 | 
   936 	my $mates=$t[7];
 | 
| 
 | 
   937 	$record++;
 | 
| 
 | 
   938 	
 | 
| 
 | 
   939 	if(!exists $links{$mates}){						#Unique links selection
 | 
| 
 | 
   940 	       
 | 
| 
 | 
   941 	    $links{$mates}=[@t];
 | 
| 
 | 
   942 	    $pt{$n}=$links{$mates};
 | 
| 
 | 
   943 	    $n++;
 | 
| 
 | 
   944 	    
 | 
| 
 | 
   945 	
 | 
| 
 | 
   946 	}else{									#get the link coordinates from the mate-pairs list
 | 
| 
 | 
   947 	    
 | 
| 
 | 
   948 	    for my $i (1,2,4,5){						#get the shortest regions
 | 
| 
 | 
   949 		
 | 
| 
 | 
   950 		$links{$mates}->[$i]=($t[$i]>$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i]	#maximum start
 | 
| 
 | 
   951 		if($i==1 || $i==4);
 | 
| 
 | 
   952 		$links{$mates}->[$i]=($t[$i]<$links{$mates}->[$i])? $t[$i]:$links{$mates}->[$i]	#minimum end
 | 
| 
 | 
   953 		if($i==2 || $i==5);
 | 
| 
 | 
   954 	    }
 | 
| 
 | 
   955 	}
 | 
| 
 | 
   956 	if($record>=$warn){
 | 
| 
 | 
   957 	    print LOG "-- $fchr : $warn links analysed - ".($n-1)." unique links done\n";
 | 
| 
 | 
   958 	    $warn+=300000;
 | 
| 
 | 
   959 	}
 | 
| 
 | 
   960     }
 | 
| 
 | 
   961     $fh->close;
 | 
| 
 | 
   962     
 | 
| 
 | 
   963     $nb_links=$n-1;
 | 
| 
 | 
   964     print LOG "-- $fchr : Total : $record links analysed - $nb_links unique links done\n";
 | 
| 
 | 
   965     
 | 
| 
 | 
   966     $fh = new FileHandle;
 | 
| 
 | 
   967     $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
 | 
| 
 | 
   968     print LOG "-- $fchr : writing...\n";
 | 
| 
 | 
   969     for my $i (1..$nb_links){
 | 
| 
 | 
   970 	
 | 
| 
 | 
   971 	print $fh join("\t",@{$pt{$i}})."\n";				#all links output
 | 
| 
 | 
   972     }
 | 
| 
 | 
   973     
 | 
| 
 | 
   974     $fh->close;
 | 
| 
 | 
   975     
 | 
| 
 | 
   976     undef %links;
 | 
| 
 | 
   977     undef %pt;
 | 
| 
 | 
   978     
 | 
| 
 | 
   979 }
 | 
| 
 | 
   980 #------------------------------------------------------------------------------#
 | 
| 
 | 
   981 #------------------------------------------------------------------------------#
 | 
| 
 | 
   982 #get the new coordinates of each link from the mate list
 | 
| 
 | 
   983 sub defineCoordsLinks{
 | 
| 
 | 
   984     
 | 
| 
 | 
   985     my ($chr,$chrID,$pair,$input_format,$sv_type,$tag_length,$links_file,$clinks_file)=@_;
 | 
| 
 | 
   986     
 | 
| 
 | 
   987     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
   988     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
   989     
 | 
| 
 | 
   990     my $fh = new FileHandle;
 | 
| 
 | 
   991     my $fh2 = new FileHandle;
 | 
| 
 | 
   992     
 | 
| 
 | 
   993     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
   994     $fh2->open(">$clinks_file") or die "$0: can't write in the output: $clinks_file :$!\n";
 | 
| 
 | 
   995     
 | 
| 
 | 
   996     print LOG "# $fchr : Defining precise link coordinates...\n";
 | 
| 
 | 
   997     
 | 
| 
 | 
   998     my $record=0;
 | 
| 
 | 
   999     my $warn=100000;
 | 
| 
 | 
  1000     
 | 
| 
 | 
  1001     my %coords;
 | 
| 
 | 
  1002     my %strands;
 | 
| 
 | 
  1003     my %order;
 | 
| 
 | 
  1004     my %ends_order;
 | 
| 
 | 
  1005     
 | 
| 
 | 
  1006     while(<$fh>){
 | 
| 
 | 
  1007 	
 | 
| 
 | 
  1008 
 | 
| 
 | 
  1009 	my ($col1,$col2)=(1,2);							#for an intrachromosomal link
 | 
| 
 | 
  1010 	my $diffchr=0;								#difference between chr1 and chr2
 | 
| 
 | 
  1011 	my ($chr1,$chr2,$mates_list,$npairs)=(split)[0,3,7,8];
 | 
| 
 | 
  1012 	($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
 | 
| 
 | 
  1013 	if ($chr1 != $chr2){							#for an interchromosomal link
 | 
| 
 | 
  1014 	    $col1=$col2=0;							#no distinction
 | 
| 
 | 
  1015 	    $diffchr=1;
 | 
| 
 | 
  1016 	}
 | 
| 
 | 
  1017 	
 | 
| 
 | 
  1018 	my @pairs=split(",",$mates_list);
 | 
| 
 | 
  1019 	
 | 
| 
 | 
  1020 	$coords{$col1}{$chr1}->{start}=undef;
 | 
| 
 | 
  1021 	$coords{$col1}{$chr1}->{end}=undef;
 | 
| 
 | 
  1022 	$coords{$col2}{$chr2}->{start}=undef;
 | 
| 
 | 
  1023 	$coords{$col2}{$chr2}->{end}=undef;
 | 
| 
 | 
  1024 	$strands{$col1}{$chr1}=undef;
 | 
| 
 | 
  1025 	$strands{$col2}{$chr2}=undef;
 | 
| 
 | 
  1026 	$ends_order{$col1}{$chr1}=undef;
 | 
| 
 | 
  1027 	$ends_order{$col2}{$chr2}=undef;
 | 
| 
 | 
  1028 	
 | 
| 
 | 
  1029 	
 | 
| 
 | 
  1030 	$order{$col1}{$chr1}->{index}->{1}=undef;
 | 
| 
 | 
  1031 	$order{$col1}{$chr1}->{index}->{2}=undef;
 | 
| 
 | 
  1032 	$order{$col2}{$chr2}->{index}->{1}=undef;
 | 
| 
 | 
  1033 	$order{$col2}{$chr2}->{index}->{2}=undef;
 | 
| 
 | 
  1034 	$order{$col1}{$chr1}->{order}=undef;
 | 
| 
 | 
  1035 	$order{$col2}{$chr2}->{order}=undef;
 | 
| 
 | 
  1036 	
 | 
| 
 | 
  1037 	$record++;
 | 
| 
 | 
  1038 
 | 
| 
 | 
  1039 	for my $p (0..$#pairs){							#for each pair
 | 
| 
 | 
  1040 	    
 | 
| 
 | 
  1041 	    my ($coord_start_read1,$coord_end_read1);
 | 
| 
 | 
  1042 	    my ($coord_start_read2,$coord_end_read2);
 | 
| 
 | 
  1043 	    my $strand_read1=recupCoords(${$$pair{$pairs[$p]}}[2],\$coord_start_read1,\$coord_end_read1,$tag_length->{${$$pair{$pairs[$p]}}[4]},$input_format);
 | 
| 
 | 
  1044 	    my $strand_read2=recupCoords(${$$pair{$pairs[$p]}}[3],\$coord_start_read2,\$coord_end_read2,$tag_length->{${$$pair{$pairs[$p]}}[5]},$input_format);
 | 
| 
 | 
  1045 
 | 
| 
 | 
  1046 	    if(!$diffchr){							#for a intrachromosomal link
 | 
| 
 | 
  1047 		if($coord_start_read2<$coord_start_read1){				#get the closer start coordinate for each column
 | 
| 
 | 
  1048 		    ($col1,$col2)=(2,1);
 | 
| 
 | 
  1049 		}else{
 | 
| 
 | 
  1050 		    ($col1,$col2)=(1,2);
 | 
| 
 | 
  1051 		}
 | 
| 
 | 
  1052 	    }
 | 
| 
 | 
  1053 	    
 | 
| 
 | 
  1054 	    push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{start}},$coord_start_read1);	#get coords and strands of f3 and r3 reads
 | 
| 
 | 
  1055 	    push(@{$coords{$col1}{${$$pair{$pairs[$p]}}[0]}->{end}},$coord_end_read1);
 | 
| 
 | 
  1056 	    push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{start}},$coord_start_read2);
 | 
| 
 | 
  1057 	    push(@{$coords{$col2}{${$$pair{$pairs[$p]}}[1]}->{end}},$coord_end_read2);
 | 
| 
 | 
  1058 	    push(@{$strands{$col1}{${$$pair{$pairs[$p]}}[0]}},$strand_read1);
 | 
| 
 | 
  1059 	    push(@{$strands{$col2}{${$$pair{$pairs[$p]}}[1]}},$strand_read2);
 | 
| 
 | 
  1060 	    push(@{$ends_order{$col1}{${$$pair{$pairs[$p]}}[0]}},${$$pair{$pairs[$p]}}[4]);
 | 
| 
 | 
  1061 	    push(@{$ends_order{$col2}{${$$pair{$pairs[$p]}}[1]}},${$$pair{$pairs[$p]}}[5]);
 | 
| 
 | 
  1062 	}
 | 
| 
 | 
  1063 	
 | 
| 
 | 
  1064 	($col1,$col2)=(1,2) if(!$diffchr);
 | 
| 
 | 
  1065 	
 | 
| 
 | 
  1066 	my $coord_start_chr1=min(min(@{$coords{$col1}{$chr1}->{start}}),min(@{$coords{$col1}{$chr1}->{end}}));		#get the biggest region
 | 
| 
 | 
  1067 	my $coord_end_chr1=max(max(@{$coords{$col1}{$chr1}->{start}}),max(@{$coords{$col1}{$chr1}->{end}}));		
 | 
| 
 | 
  1068 	my $coord_start_chr2=min(min(@{$coords{$col2}{$chr2}->{start}}),min(@{$coords{$col2}{$chr2}->{end}}));	
 | 
| 
 | 
  1069 	my $coord_end_chr2=max(max(@{$coords{$col2}{$chr2}->{start}}),max(@{$coords{$col2}{$chr2}->{end}}));
 | 
| 
 | 
  1070 	
 | 
| 
 | 
  1071 	@{$order{$col1}{$chr1}->{index}->{1}}= sort {${$coords{$col1}{$chr1}->{start}}[$a] <=>  ${$coords{$col1}{$chr1}->{start}}[$b]} 0 .. $#{$coords{$col1}{$chr1}->{start}};
 | 
| 
 | 
  1072 	@{$order{$col2}{$chr2}->{index}->{1}}= sort {${$coords{$col2}{$chr2}->{start}}[$a] <=>  ${$coords{$col2}{$chr2}->{start}}[$b]} 0 .. $#{$coords{$col2}{$chr2}->{start}};
 | 
| 
 | 
  1073 	
 | 
| 
 | 
  1074 	foreach my $i (@{$order{$col1}{$chr1}->{index}->{1}}){			#get the rank of the chr2 reads according to the sorted chr1 reads (start coordinate sorting)
 | 
| 
 | 
  1075 	    foreach my $j (@{$order{$col2}{$chr2}->{index}->{1}}){
 | 
| 
 | 
  1076 		
 | 
| 
 | 
  1077 		if(${$order{$col1}{$chr1}->{index}->{1}}[$i] == ${$order{$col2}{$chr2}->{index}->{1}}[$j]){
 | 
| 
 | 
  1078 		    ${$order{$col1}{$chr1}->{index}->{2}}[$i]=$i;
 | 
| 
 | 
  1079 		    ${$order{$col2}{$chr2}->{index}->{2}}[$i]=$j;
 | 
| 
 | 
  1080 		    last;
 | 
| 
 | 
  1081 		}
 | 
| 
 | 
  1082 	    }
 | 
| 
 | 
  1083 	}
 | 
| 
 | 
  1084 	
 | 
| 
 | 
  1085 	foreach my $i (@{$order{$col1}{$chr1}->{index}->{2}}){			#use rank chr1 as an ID
 | 
| 
 | 
  1086 	    foreach my $j (@{$order{$col2}{$chr2}->{index}->{2}}){
 | 
| 
 | 
  1087 		
 | 
| 
 | 
  1088 		if(${$order{$col1}{$chr1}->{index}->{2}}[$i] == ${$order{$col2}{$chr2}->{index}->{2}}[$j]){
 | 
| 
 | 
  1089 		    ${$order{$col1}{$chr1}->{order}}[$i]=$i+1;
 | 
| 
 | 
  1090 		    ${$order{$col2}{$chr2}->{order}}[$i]=$j+1;
 | 
| 
 | 
  1091 		    last;
 | 
| 
 | 
  1092 		}
 | 
| 
 | 
  1093 	    }
 | 
| 
 | 
  1094 	}
 | 
| 
 | 
  1095 	
 | 
| 
 | 
  1096 	@pairs=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},\@pairs);#sorting of the pairs, strands, and start coords from the sorted chr2 reads
 | 
| 
 | 
  1097 	@{$strands{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col1}{$chr1});
 | 
| 
 | 
  1098 	@{$strands{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$strands{$col2}{$chr2});
 | 
| 
 | 
  1099 	@{$ends_order{$col1}{$chr1}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col1}{$chr1});
 | 
| 
 | 
  1100 	@{$ends_order{$col2}{$chr2}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$ends_order{$col2}{$chr2});
 | 
| 
 | 
  1101 	@{$coords{$col1}{$chr1}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col1}{$chr1}->{start});
 | 
| 
 | 
  1102 	@{$coords{$col2}{$chr2}->{start}}=sortTablebyIndex(\@{$order{$col1}{$chr1}->{index}->{1}},$coords{$col2}{$chr2}->{start});
 | 
| 
 | 
  1103 	
 | 
| 
 | 
  1104 	
 | 
| 
 | 
  1105 	my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1,	#all information output
 | 
| 
 | 
  1106 		  $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
 | 
| 
 | 
  1107 		  scalar @pairs,
 | 
| 
 | 
  1108 		  join(",",@pairs),
 | 
| 
 | 
  1109 		  join(",",@{$strands{$col1}{$chr1}}),
 | 
| 
 | 
  1110 		  join(",",@{$strands{$col2}{$chr2}}),
 | 
| 
 | 
  1111 		  join(",",@{$ends_order{$col1}{$chr1}}),
 | 
| 
 | 
  1112 		  join(",",@{$ends_order{$col2}{$chr2}}),
 | 
| 
 | 
  1113 		  join(",",@{$order{$col1}{$chr1}->{order}}),
 | 
| 
 | 
  1114 		  join(",",@{$order{$col2}{$chr2}->{order}}),
 | 
| 
 | 
  1115 		  join(",",@{$coords{$col1}{$chr1}->{start}}),
 | 
| 
 | 
  1116 		  join(",",@{$coords{$col2}{$chr2}->{start}}));
 | 
| 
 | 
  1117 	
 | 
| 
 | 
  1118 	print $fh2 join("\t",@link)."\n";
 | 
| 
 | 
  1119 	
 | 
| 
 | 
  1120 	if($record>=$warn){
 | 
| 
 | 
  1121 	    print LOG "-- $fchr : $warn links processed\n";
 | 
| 
 | 
  1122 	    $warn+=100000;
 | 
| 
 | 
  1123 	}
 | 
| 
 | 
  1124     }
 | 
| 
 | 
  1125     $fh->close;
 | 
| 
 | 
  1126     $fh2->close;
 | 
| 
 | 
  1127     
 | 
| 
 | 
  1128     print LOG "-- $fchr : Total : $record links processed\n";
 | 
| 
 | 
  1129     
 | 
| 
 | 
  1130 }
 | 
| 
 | 
  1131 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1132 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1133 #Sort links according the concerned chromosomes and their coordinates
 | 
| 
 | 
  1134 sub sortLinks{
 | 
| 
 | 
  1135     
 | 
| 
 | 
  1136     my ($links_file,$sortedlinks_file,$unique)=@_;
 | 
| 
 | 
  1137     
 | 
| 
 | 
  1138     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
  1139     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
  1140     
 | 
| 
 | 
  1141     
 | 
| 
 | 
  1142     print LOG "# $fchr : Sorting links...\n";
 | 
| 
 | 
  1143     
 | 
| 
 | 
  1144     my $pipe=($unique)? "| sort -u":"";
 | 
| 
 | 
  1145     system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file";
 | 
| 
 | 
  1146 
 | 
| 
 | 
  1147 }
 | 
| 
 | 
  1148 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1149 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1150 #removal of fully overlapped links
 | 
| 
 | 
  1151 sub removeFullyOverlappedLinks{
 | 
| 
 | 
  1152     
 | 
| 
 | 
  1153     my ($links_file,$nrlinks_file,$warn_out)=@_;
 | 
| 
 | 
  1154     
 | 
| 
 | 
  1155     my %pt;
 | 
| 
 | 
  1156     my $n=1;
 | 
| 
 | 
  1157     
 | 
| 
 | 
  1158     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
  1159     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
  1160     
 | 
| 
 | 
  1161     my $fh = new FileHandle;
 | 
| 
 | 
  1162     
 | 
| 
 | 
  1163     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  1164     while(<$fh>){
 | 
| 
 | 
  1165 	
 | 
| 
 | 
  1166 	my @t=split("\t",$_);
 | 
| 
 | 
  1167 	$pt{$n}=[@t];
 | 
| 
 | 
  1168 	$n++;
 | 
| 
 | 
  1169     }
 | 
| 
 | 
  1170     $fh->close;
 | 
| 
 | 
  1171     
 | 
| 
 | 
  1172     my $nb_links=$n-1;
 | 
| 
 | 
  1173     my $nb=$nb_links;
 | 
| 
 | 
  1174 
 | 
| 
 | 
  1175     my %pt2;
 | 
| 
 | 
  1176     my $nb2=1;
 | 
| 
 | 
  1177     my $record=0;
 | 
| 
 | 
  1178     my $warn=10000;
 | 
| 
 | 
  1179     
 | 
| 
 | 
  1180     print LOG "# $fchr : Removing fully overlapped links...\n";
 | 
| 
 | 
  1181     
 | 
| 
 | 
  1182     LINK:
 | 
| 
 | 
  1183 
 | 
| 
 | 
  1184     for my $i (1..$nb){
 | 
| 
 | 
  1185 	    
 | 
| 
 | 
  1186 	my @link=();
 | 
| 
 | 
  1187 	my @next_link=();
 | 
| 
 | 
  1188 	my $ind1=$i;
 | 
| 
 | 
  1189 	
 | 
| 
 | 
  1190 	$record++;
 | 
| 
 | 
  1191 	if($record>=$warn){
 | 
| 
 | 
  1192 	    print LOG "-- $fchr : $warn unique links analysed - ".($nb2-1)." non-overlapped links done\n";
 | 
| 
 | 
  1193 	    $warn+=10000;
 | 
| 
 | 
  1194 	}
 | 
| 
 | 
  1195 	
 | 
| 
 | 
  1196 	if(exists $pt{$ind1}){
 | 
| 
 | 
  1197 	    @link=@{$pt{$ind1}};                                                #link1
 | 
| 
 | 
  1198 	}else{
 | 
| 
 | 
  1199 	    next LINK;
 | 
| 
 | 
  1200 	}
 | 
| 
 | 
  1201 	
 | 
| 
 | 
  1202 	my ($chr1,$start1,$end1,$chr2,$start2,$end2)=($link[0],$link[1],$link[2],$link[3],$link[4],$link[5]);	#get info of link1
 | 
| 
 | 
  1203 	my @mates=deleteBadOrderSensePairs(split(",",$link[7]));
 | 
| 
 | 
  1204     
 | 
| 
 | 
  1205 	my $ind2=$ind1+1;
 | 
| 
 | 
  1206 	$ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);			#get the next found link
 | 
| 
 | 
  1207 	
 | 
| 
 | 
  1208 	if($ind2<=$nb){
 | 
| 
 | 
  1209 	    
 | 
| 
 | 
  1210 	    @next_link=@{$pt{$ind2}};					        #link2
 | 
| 
 | 
  1211 	    my ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]); #get info of link2
 | 
| 
 | 
  1212 	    my @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
 | 
| 
 | 
  1213 	  
 | 
| 
 | 
  1214 	    while(($chr1 eq $chr3 && $chr2 eq $chr4) && overlap($start1,$end1,$start3,$end3)){	#loop here according to the chr1 coordinates, need an overlap between links to enter
 | 
| 
 | 
  1215 		    
 | 
| 
 | 
  1216 		if(!overlap($start2,$end2,$start4,$end4)){			#if no overlap with chr2 coordinates ->next link2
 | 
| 
 | 
  1217 		    
 | 
| 
 | 
  1218 		    $ind2++;
 | 
| 
 | 
  1219 		    $ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
 | 
| 
 | 
  1220 		   
 | 
| 
 | 
  1221 		    if($ind2>$nb){						#if no more link in the file -> save link1
 | 
| 
 | 
  1222 			
 | 
| 
 | 
  1223 			$pt2{$nb2}=\@link;
 | 
| 
 | 
  1224 			$nb2++;
 | 
| 
 | 
  1225 			next LINK;
 | 
| 
 | 
  1226 		    }
 | 
| 
 | 
  1227 		    
 | 
| 
 | 
  1228 		    @next_link=@{$pt{$ind2}};
 | 
| 
 | 
  1229 		    ($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
 | 
| 
 | 
  1230 		    @next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
 | 
| 
 | 
  1231 		    next;
 | 
| 
 | 
  1232 		}
 | 
| 
 | 
  1233 		
 | 
| 
 | 
  1234 		my %mates=map{$_ =>1} @mates;					#get the equal number of mates
 | 
| 
 | 
  1235 		my @same_mates = grep( $mates{$_}, @next_mates );
 | 
| 
 | 
  1236 		my $nb_mates= scalar @same_mates;
 | 
| 
 | 
  1237 	      
 | 
| 
 | 
  1238 		if($nb_mates == scalar @mates){
 | 
| 
 | 
  1239 		    
 | 
| 
 | 
  1240 		    delete $pt{$ind1};						#if pairs of link 1 are all included in link 2 -> delete link1
 | 
| 
 | 
  1241 		    next LINK;							#go to link2, link2 becomes link1
 | 
| 
 | 
  1242 		    
 | 
| 
 | 
  1243 		}else{
 | 
| 
 | 
  1244 			delete $pt{$ind2} if($nb_mates == scalar @next_mates);	#if pairs of link2 are all included in link 1 -> delete link2
 | 
| 
 | 
  1245 			$ind2++;							#we continue by checking the next link2
 | 
| 
 | 
  1246 			$ind2++ while (!exists $pt{$ind2}&& $ind2<=$nb);
 | 
| 
 | 
  1247 			
 | 
| 
 | 
  1248 			if($ind2>$nb){						#if no more link in the file -> save link1
 | 
| 
 | 
  1249 			  
 | 
| 
 | 
  1250 			    $pt2{$nb2}=\@link;
 | 
| 
 | 
  1251 			    $nb2++;
 | 
| 
 | 
  1252 			    next LINK;
 | 
| 
 | 
  1253 			}
 | 
| 
 | 
  1254 		    
 | 
| 
 | 
  1255 			@next_link=@{$pt{$ind2}};					#get info of link2
 | 
| 
 | 
  1256 			($chr3,$start3,$end3,$chr4,$start4,$end4)=($next_link[0],$next_link[1],$next_link[2],$next_link[3],$next_link[4],$next_link[5]);
 | 
| 
 | 
  1257 			@next_mates=deleteBadOrderSensePairs(split(",",$next_link[7]));
 | 
| 
 | 
  1258 			
 | 
| 
 | 
  1259 		}
 | 
| 
 | 
  1260 	    }
 | 
| 
 | 
  1261 	}
 | 
| 
 | 
  1262 	$pt2{$nb2}=\@link;							#if no (more) link with chr1 coordinates overlap -> save link1
 | 
| 
 | 
  1263 	$nb2++;
 | 
| 
 | 
  1264     }																																				
 | 
| 
 | 
  1265    
 | 
| 
 | 
  1266     print LOG "-- $fchr : Total : $nb_links unique links analysed - ".($nb2-1)." non-overlapped links done\n";
 | 
| 
 | 
  1267     
 | 
| 
 | 
  1268     #OUTPUT
 | 
| 
 | 
  1269     
 | 
| 
 | 
  1270     $fh = new FileHandle;
 | 
| 
 | 
  1271     $fh->open(">$nrlinks_file") or die "$0: can't write in the output: $nrlinks_file :$!\n";
 | 
| 
 | 
  1272     print LOG "-- $fchr :  writing...\n";
 | 
| 
 | 
  1273     for my $i (1..$nb2-1){
 | 
| 
 | 
  1274 	
 | 
| 
 | 
  1275 	print $fh join("\t",@{$pt2{$i}});				#all links output
 | 
| 
 | 
  1276     }
 | 
| 
 | 
  1277     
 | 
| 
 | 
  1278     close $fh;
 | 
| 
 | 
  1279     
 | 
| 
 | 
  1280     print LOG "-- $fchr : output created: $nrlinks_file\n" if($warn_out);
 | 
| 
 | 
  1281     
 | 
| 
 | 
  1282     undef %pt;
 | 
| 
 | 
  1283     undef %pt2;
 | 
| 
 | 
  1284 }
 | 
| 
 | 
  1285 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1286 sub postFiltering {
 | 
| 
 | 
  1287     
 | 
| 
 | 
  1288     my ($links_file,$pflinks_file, $finalScore_thres)=@_;
 | 
| 
 | 
  1289     
 | 
| 
 | 
  1290     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
  1291     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
  1292     
 | 
| 
 | 
  1293     
 | 
| 
 | 
  1294     my ($nb,$nb2)=(0,0);
 | 
| 
 | 
  1295     
 | 
| 
 | 
  1296     print LOG "# $fchr : Post-filtering links...\n";
 | 
| 
 | 
  1297     print LOG "-- $fchr : final score threshold = $finalScore_thres\n";
 | 
| 
 | 
  1298     
 | 
| 
 | 
  1299     my $fh = new FileHandle;
 | 
| 
 | 
  1300     my $fh2 = new FileHandle;
 | 
| 
 | 
  1301     
 | 
| 
 | 
  1302     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  1303     $fh2->open(">$pflinks_file") or die "$0: can't write in the output: $pflinks_file :$!\n";
 | 
| 
 | 
  1304     
 | 
| 
 | 
  1305     
 | 
| 
 | 
  1306     while(<$fh>){
 | 
| 
 | 
  1307 	
 | 
| 
 | 
  1308 	my @t=split("\t",$_);
 | 
| 
 | 
  1309 	my $score=$t[$#t-1];
 | 
| 
 | 
  1310 	
 | 
| 
 | 
  1311 	if($score >= $finalScore_thres){
 | 
| 
 | 
  1312 	    print $fh2 join("\t", @t);
 | 
| 
 | 
  1313 	    $nb2++;
 | 
| 
 | 
  1314 	}
 | 
| 
 | 
  1315 	$nb++;
 | 
| 
 | 
  1316     }
 | 
| 
 | 
  1317     $fh->close;
 | 
| 
 | 
  1318     $fh2->close;
 | 
| 
 | 
  1319     
 | 
| 
 | 
  1320     print LOG "-- $fchr : Total : $nb unique links analysed - $nb2 links kept\n";
 | 
| 
 | 
  1321     print LOG "-- $fchr : output created: $pflinks_file\n";
 | 
| 
 | 
  1322 }
 | 
| 
 | 
  1323 
 | 
| 
 | 
  1324 
 | 
| 
 | 
  1325 
 | 
| 
 | 
  1326 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1327 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1328 #Filtering of the links
 | 
| 
 | 
  1329 sub strandFiltering{
 | 
| 
 | 
  1330     
 | 
| 
 | 
  1331     my($chr,$chrID,$pairs_threshold,$strand_filtering,$chromosomes,
 | 
| 
 | 
  1332        $input_format,$cmap_file,$mate_sense, $tag_length,$links_file,$flinks_file)=@_;
 | 
| 
 | 
  1333     
 | 
| 
 | 
  1334     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
  1335     my $fchr=$sfile[$#sfile-1];
 | 
| 
 | 
  1336     
 | 
| 
 | 
  1337     
 | 
| 
 | 
  1338     my %chrs;
 | 
| 
 | 
  1339     my %chrs1;
 | 
| 
 | 
  1340     my %chrs2;
 | 
| 
 | 
  1341     my $nb_chrs;
 | 
| 
 | 
  1342     my $exclude;
 | 
| 
 | 
  1343     
 | 
| 
 | 
  1344     if($chromosomes){
 | 
| 
 | 
  1345 	my @chrs=split(",",$chromosomes);
 | 
| 
 | 
  1346 	$nb_chrs=scalar @chrs;
 | 
| 
 | 
  1347 	$exclude=($chrs[0]=~/^\-/)? 1:0;
 | 
| 
 | 
  1348 	for my $chrName (@chrs){
 | 
| 
 | 
  1349 	    $chrName=~s/^(\-)//;
 | 
| 
 | 
  1350 	    my $col=($chrName=~s/_(1|2)$//);
 | 
| 
 | 
  1351 	    
 | 
| 
 | 
  1352 	    if(!$col){
 | 
| 
 | 
  1353 		$chrs{$chrID->{$chrName}}=undef 
 | 
| 
 | 
  1354 	    }else{
 | 
| 
 | 
  1355 		$chrs1{$chrID->{$chrName}}=undef if($1==1);
 | 
| 
 | 
  1356 		$chrs2{$chrID->{$chrName}}=undef if($1==2);
 | 
| 
 | 
  1357 	    }
 | 
| 
 | 
  1358 	}
 | 
| 
 | 
  1359     }
 | 
| 
 | 
  1360  
 | 
| 
 | 
  1361     my $record=0;
 | 
| 
 | 
  1362     my $nb_links=0;
 | 
| 
 | 
  1363     my $warn=10000;
 | 
| 
 | 
  1364     
 | 
| 
 | 
  1365     my $sens_ratio_threshold=0.6;
 | 
| 
 | 
  1366     
 | 
| 
 | 
  1367     print LOG "\# Filtering procedure...\n";
 | 
| 
 | 
  1368     print LOG "\# Number of pairs and strand filtering...\n";
 | 
| 
 | 
  1369     print LOG "-- file=$links_file\n";
 | 
| 
 | 
  1370     print LOG "-- nb_pairs_threshold=$pairs_threshold, strand_filtering=".(($strand_filtering)? "yes":"no").
 | 
| 
 | 
  1371 	 ", chromosomes=".(($chromosomes)? "$chromosomes":"all")."\n";
 | 
| 
 | 
  1372     
 | 
| 
 | 
  1373     
 | 
| 
 | 
  1374     
 | 
| 
 | 
  1375     my $fh = new FileHandle;
 | 
| 
 | 
  1376     my $fh2 = new FileHandle;
 | 
| 
 | 
  1377     
 | 
| 
 | 
  1378     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  1379     $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
 | 
| 
 | 
  1380 
 | 
| 
 | 
  1381     while(<$fh>){
 | 
| 
 | 
  1382 	
 | 
| 
 | 
  1383 	my @t=split;	#for each link
 | 
| 
 | 
  1384 	my $is_good=1;
 | 
| 
 | 
  1385 	$record++;
 | 
| 
 | 
  1386 	
 | 
| 
 | 
  1387 	
 | 
| 
 | 
  1388 	if($chromosomes){
 | 
| 
 | 
  1389 	    
 | 
| 
 | 
  1390 	    my ($chr1,$chr2)=($chrID->{$t[0]},$chrID->{$t[3]});
 | 
| 
 | 
  1391 	    
 | 
| 
 | 
  1392 	    if(!$exclude){
 | 
| 
 | 
  1393 		$is_good=(exists $chrs{$chr1} && exists $chrs{$chr2})? 1:0;
 | 
| 
 | 
  1394 		$is_good=(exists $chrs1{$chr1} && exists $chrs2{$chr2})? 1:0 if(!$is_good);
 | 
| 
 | 
  1395 		$is_good=($nb_chrs==1 && (exists $chrs1{$chr1} || exists $chrs2{$chr2}))? 1:0 if(!$is_good);
 | 
| 
 | 
  1396 	    }else{
 | 
| 
 | 
  1397 		$is_good=(exists $chrs{$chr1} || exists $chrs{$chr2})? 0:1;
 | 
| 
 | 
  1398 		$is_good=(exists $chrs1{$chr1} || exists $chrs2{$chr2})? 0:1 if($is_good);
 | 
| 
 | 
  1399 	    }
 | 
| 
 | 
  1400 	}
 | 
| 
 | 
  1401 	
 | 
| 
 | 
  1402 	$is_good = ($is_good && $t[6] >= $pairs_threshold)? 1 :0;		#filtering according the number of pairs
 | 
| 
 | 
  1403 	if($is_good && $strand_filtering){					#if filtering according the strand sense
 | 
| 
 | 
  1404 	    
 | 
| 
 | 
  1405 	    my @mates=split(/,/,$t[7]);						#get the concordant pairs in the strand sense
 | 
| 
 | 
  1406 	    my @strands1=split(/,/,$t[8]);
 | 
| 
 | 
  1407 	    my @strands2=split(/,/,$t[9]);
 | 
| 
 | 
  1408 	    
 | 
| 
 | 
  1409 	    my %mate_class=( 'FF' => 0, 'RR' => 0, 'FR' => 0,  'RF' => 0);
 | 
| 
 | 
  1410 	    
 | 
| 
 | 
  1411 	    my %mate_reverse=( 'FF' => 'RR', 'RR' => 'FF',			#group1: FF,RR
 | 
| 
 | 
  1412 		       'FR' => 'RF', 'RF' => 'FR');				#group2: FR,RF
 | 
| 
 | 
  1413 	    
 | 
| 
 | 
  1414 	    my %mate_class2=( $mate_sense=>"NORMAL_SENSE", inverseSense($mate_sense)=>"NORMAL_SENSE",
 | 
| 
 | 
  1415 		    substr($mate_sense,0,1).inverseSense(substr($mate_sense,1,1))=>"REVERSE_SENSE",
 | 
| 
 | 
  1416 		    inverseSense(substr($mate_sense,0,1)).substr($mate_sense,1,1)=>"REVERSE_SENSE");
 | 
| 
 | 
  1417 	    
 | 
| 
 | 
  1418 	    if($t[6] == 1){
 | 
| 
 | 
  1419 		
 | 
| 
 | 
  1420 		push(@t,$mate_class2{$strands1[0].$strands2[0]},"1/1",1,1);
 | 
| 
 | 
  1421 		
 | 
| 
 | 
  1422 	    }else{
 | 
| 
 | 
  1423 		
 | 
| 
 | 
  1424 		tie (my %class,'Tie::IxHash');
 | 
| 
 | 
  1425 		my $split;
 | 
| 
 | 
  1426 		
 | 
| 
 | 
  1427 		foreach my $i (0..$#mates){
 | 
| 
 | 
  1428 		    $mate_class{$strands1[$i].$strands2[$i]}++;		#get the over-represented group
 | 
| 
 | 
  1429 		}
 | 
| 
 | 
  1430 		
 | 
| 
 | 
  1431 		my $nb_same_sens_class=$mate_class{FF}+$mate_class{RR};
 | 
| 
 | 
  1432 		my $nb_diff_sens_class=$mate_class{FR}+$mate_class{RF};
 | 
| 
 | 
  1433 		my $sens_ratio=max($nb_same_sens_class,$nb_diff_sens_class)/($nb_same_sens_class+$nb_diff_sens_class);
 | 
| 
 | 
  1434 		
 | 
| 
 | 
  1435 		if($sens_ratio < $sens_ratio_threshold){
 | 
| 
 | 
  1436 		    %class=(1=>'FF', 2=>'FR');
 | 
| 
 | 
  1437 		    $split=1;
 | 
| 
 | 
  1438 		}else{
 | 
| 
 | 
  1439 		    $class{1}=($nb_same_sens_class >  $nb_diff_sens_class)? 'FF':'FR';	#if yes get the concerned class
 | 
| 
 | 
  1440 		    $split=0;
 | 
| 
 | 
  1441 		}
 | 
| 
 | 
  1442 		
 | 
| 
 | 
  1443 		$is_good=getConsistentSenseLinks(\@t,\@mates,\@strands1,\@strands2,$tag_length,$mate_sense,\%mate_reverse,\%mate_class2,\%class,$split,$pairs_threshold);
 | 
| 
 | 
  1444 	    }
 | 
| 
 | 
  1445 	}
 | 
| 
 | 
  1446 	
 | 
| 
 | 
  1447 	if($is_good){								#PRINT
 | 
| 
 | 
  1448 	    
 | 
| 
 | 
  1449 	    my $nb=scalar @t;
 | 
| 
 | 
  1450 	    if($nb > 20){
 | 
| 
 | 
  1451 		my @t2=splice(@t,0,20);
 | 
| 
 | 
  1452 		print $fh2 join("\t",@t2)."\n";
 | 
| 
 | 
  1453 		$nb_links++;
 | 
| 
 | 
  1454 	    }
 | 
| 
 | 
  1455 	    $nb_links++;
 | 
| 
 | 
  1456 	    print $fh2 join("\t",@t)."\n";
 | 
| 
 | 
  1457 	}
 | 
| 
 | 
  1458 	
 | 
| 
 | 
  1459 	if($record>=$warn){
 | 
| 
 | 
  1460 	   print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
 | 
| 
 | 
  1461 	    $warn+=10000;
 | 
| 
 | 
  1462 	}
 | 
| 
 | 
  1463     }
 | 
| 
 | 
  1464     $fh->close;
 | 
| 
 | 
  1465     $fh2->close;
 | 
| 
 | 
  1466     
 | 
| 
 | 
  1467     print LOG "-- $fchr : No links have been found with the selected filtering parameters\n" if(!$nb_links);
 | 
| 
 | 
  1468     
 | 
| 
 | 
  1469     print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
 | 
| 
 | 
  1470     
 | 
| 
 | 
  1471 
 | 
| 
 | 
  1472 }
 | 
| 
 | 
  1473 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1474 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1475 sub getConsistentSenseLinks{
 | 
| 
 | 
  1476     
 | 
| 
 | 
  1477     my ($t,$mates,$strands1,$strands2,$tag_length,$mate_sense, $mate_reverse,$mate_class2, $class, $split,$thres)=@_;
 | 
| 
 | 
  1478     
 | 
| 
 | 
  1479     my $npairs=scalar @$mates;
 | 
| 
 | 
  1480 
 | 
| 
 | 
  1481     my @ends_order1 = split (/,/,$$t[10]);
 | 
| 
 | 
  1482     my @ends_order2 = split (/,/,$$t[11]);
 | 
| 
 | 
  1483     my @order1 = split (/,/,$$t[12]);
 | 
| 
 | 
  1484     my @order2 = split (/,/,$$t[13]);
 | 
| 
 | 
  1485     my @positions1 = split (/,/,$$t[14]);
 | 
| 
 | 
  1486     my @positions2 = split (/,/,$$t[15]);
 | 
| 
 | 
  1487     
 | 
| 
 | 
  1488     my @newlink;
 | 
| 
 | 
  1489 
 | 
| 
 | 
  1490     foreach my $ind (keys %{$class} ){
 | 
| 
 | 
  1491 	
 | 
| 
 | 
  1492 	tie (my %flink,'Tie::IxHash');
 | 
| 
 | 
  1493 	my @orders2remove=();
 | 
| 
 | 
  1494 	
 | 
| 
 | 
  1495 	foreach my $i (0..$#{$mates}){					#get the pairs belonging the over-represented group
 | 
| 
 | 
  1496 	
 | 
| 
 | 
  1497 	    if((($$strands1[$i].$$strands2[$i]) eq $$class{$ind}) || (($$strands1[$i].$$strands2[$i]) eq $$mate_reverse{$$class{$ind}})){
 | 
| 
 | 
  1498 		push(@{$flink{mates}},$$mates[$i]);
 | 
| 
 | 
  1499 		push(@{$flink{strands1}},$$strands1[$i]);
 | 
| 
 | 
  1500 		push(@{$flink{strands2}},$$strands2[$i]);
 | 
| 
 | 
  1501 		push(@{$flink{ends_order1}},$ends_order1[$i]);
 | 
| 
 | 
  1502 		push(@{$flink{ends_order2}},$ends_order2[$i]);
 | 
| 
 | 
  1503 		push(@{$flink{positions1}},$positions1[$i]);
 | 
| 
 | 
  1504 		push(@{$flink{positions2}},$positions2[$i]);
 | 
| 
 | 
  1505 			    
 | 
| 
 | 
  1506 	    }else{
 | 
| 
 | 
  1507 		push(@orders2remove,$order1[$i]);
 | 
| 
 | 
  1508 	    }
 | 
| 
 | 
  1509 	}
 | 
| 
 | 
  1510 	
 | 
| 
 | 
  1511 	@{$flink{order1}}=();
 | 
| 
 | 
  1512 	@{$flink{order2}}=();
 | 
| 
 | 
  1513 	if(scalar @orders2remove > 0){
 | 
| 
 | 
  1514 	    getNewOrders(\@order1,\@order2,\@orders2remove,$flink{order1},$flink{order2})
 | 
| 
 | 
  1515 	}else{
 | 
| 
 | 
  1516 	    @{$flink{order1}}=@order1;
 | 
| 
 | 
  1517 	    @{$flink{order2}}=@order2;
 | 
| 
 | 
  1518 	}
 | 
| 
 | 
  1519 	
 | 
| 
 | 
  1520 	my @ends1; getEnds(\@ends1,$flink{positions1},$flink{strands1},$flink{ends_order1},$tag_length);
 | 
| 
 | 
  1521 	my @ends2; getEnds(\@ends2,$flink{positions2},$flink{strands2},$flink{ends_order2},$tag_length);
 | 
| 
 | 
  1522 	
 | 
| 
 | 
  1523 	my $fnpairs=scalar @{$flink{mates}};
 | 
| 
 | 
  1524 	my $strand_filtering_ratio=$fnpairs."/".$npairs;
 | 
| 
 | 
  1525 	my $real_ratio=$fnpairs/$npairs;
 | 
| 
 | 
  1526 	
 | 
| 
 | 
  1527 	if($fnpairs>=$thres){							#filtering according the number of pairs
 | 
| 
 | 
  1528 	    
 | 
| 
 | 
  1529 	    push(@newlink,
 | 
| 
 | 
  1530 		$$t[0],
 | 
| 
 | 
  1531 		min(min(@{$flink{positions1}}),min(@ends1)),
 | 
| 
 | 
  1532 		max(max(@{$flink{positions1}}),max(@ends1)),
 | 
| 
 | 
  1533 		$$t[3],
 | 
| 
 | 
  1534 		min(min(@{$flink{positions2}}),min(@ends2)),
 | 
| 
 | 
  1535 		max(max(@{$flink{positions2}}),max(@ends2)),
 | 
| 
 | 
  1536 		$fnpairs,
 | 
| 
 | 
  1537 		join(",",@{$flink{mates}}),
 | 
| 
 | 
  1538 		join(",",@{$flink{strands1}}),
 | 
| 
 | 
  1539 		join(",",@{$flink{strands2}}),
 | 
| 
 | 
  1540 		join(",",@{$flink{ends_order1}}),
 | 
| 
 | 
  1541 		join(",",@{$flink{ends_order2}}),
 | 
| 
 | 
  1542 		join(",",@{$flink{order1}}),
 | 
| 
 | 
  1543 		join(",",@{$flink{order2}}),
 | 
| 
 | 
  1544 		join(",",@{$flink{positions1}}),
 | 
| 
 | 
  1545 		join(",",@{$flink{positions2}}),
 | 
| 
 | 
  1546 		$$mate_class2{${$flink{strands1}}[0].${$flink{strands2}}[0]},
 | 
| 
 | 
  1547 		$strand_filtering_ratio,
 | 
| 
 | 
  1548 		$real_ratio,
 | 
| 
 | 
  1549 		$npairs
 | 
| 
 | 
  1550 	    );
 | 
| 
 | 
  1551 	}
 | 
| 
 | 
  1552     }
 | 
| 
 | 
  1553     
 | 
| 
 | 
  1554     if (grep {defined($_)} @newlink) {
 | 
| 
 | 
  1555 	@$t=@newlink;
 | 
| 
 | 
  1556 	return 1
 | 
| 
 | 
  1557     }
 | 
| 
 | 
  1558     return 0;
 | 
| 
 | 
  1559     
 | 
| 
 | 
  1560 }
 | 
| 
 | 
  1561 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1562 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1563 sub getNewOrders{
 | 
| 
 | 
  1564     
 | 
| 
 | 
  1565     my($tab1,$tab2,$list,$newtab1,$newtab2)=@_;
 | 
| 
 | 
  1566     my $j=1;
 | 
| 
 | 
  1567     my $k=1;
 | 
| 
 | 
  1568     for my $i (0..$#{$tab2}){
 | 
| 
 | 
  1569 	my $c=0;
 | 
| 
 | 
  1570 	for my $j (0..$#{$list}){
 | 
| 
 | 
  1571 	    $c++ if(${$list}[$j] < ${$tab2}[$i]);
 | 
| 
 | 
  1572 	    if(${$list}[$j] == ${$tab2}[$i]){
 | 
| 
 | 
  1573 		$c=-1; last;
 | 
| 
 | 
  1574 	    }
 | 
| 
 | 
  1575 	}
 | 
| 
 | 
  1576 	if($c!=-1){
 | 
| 
 | 
  1577 	    push(@{$newtab2}, ${$tab2}[$i]-$c);
 | 
| 
 | 
  1578 	    push(@{$newtab1}, $k);
 | 
| 
 | 
  1579 	    $k++;
 | 
| 
 | 
  1580 	}
 | 
| 
 | 
  1581     }
 | 
| 
 | 
  1582 }
 | 
| 
 | 
  1583 
 | 
| 
 | 
  1584 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1585 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1586 #Filtering of the links using their order
 | 
| 
 | 
  1587 sub orderFiltering {
 | 
| 
 | 
  1588 
 | 
| 
 | 
  1589     my ($chr,$chrID,$nb_pairs_threshold,$nb_pairs_order_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
 | 
| 
 | 
  1590     
 | 
| 
 | 
  1591     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
  1592     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
  1593     
 | 
| 
 | 
  1594     
 | 
| 
 | 
  1595     my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0; 
 | 
| 
 | 
  1596     
 | 
| 
 | 
  1597     my $record=0;
 | 
| 
 | 
  1598     my $warn=10000;
 | 
| 
 | 
  1599     my $nb_links=0;
 | 
| 
 | 
  1600     
 | 
| 
 | 
  1601     my $quant05 = 1.644854;
 | 
| 
 | 
  1602     my $quant001 = 3.090232;
 | 
| 
 | 
  1603     my $alphaDist = $quant05 * 2 * $sigma;
 | 
| 
 | 
  1604     my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
 | 
| 
 | 
  1605     
 | 
| 
 | 
  1606     print LOG "\# Filtering by order...\n";
 | 
| 
 | 
  1607     print LOG "-- mu length=$mu, sigma length=$sigma, nb pairs order threshold=$nb_pairs_order_threshold\n";
 | 
| 
 | 
  1608     print LOG "-- distance between comparable pairs was set to $alphaDist\n";
 | 
| 
 | 
  1609     print LOG "-- maximal fragment length was set to $maxFragmentLength\n";
 | 
| 
 | 
  1610 
 | 
| 
 | 
  1611     
 | 
| 
 | 
  1612     my $fh = new FileHandle;
 | 
| 
 | 
  1613     my $fh2 = new FileHandle;
 | 
| 
 | 
  1614     
 | 
| 
 | 
  1615     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  1616     $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
 | 
| 
 | 
  1617     
 | 
| 
 | 
  1618     while(<$fh>){
 | 
| 
 | 
  1619 	
 | 
| 
 | 
  1620 	$record++; 
 | 
| 
 | 
  1621 	my @t = split; 
 | 
| 
 | 
  1622 	my ($chr1,$chr2,$mates_list)=@t[0,3,7];
 | 
| 
 | 
  1623 	my @pairs=split(",",$mates_list);
 | 
| 
 | 
  1624 	($chr1,$chr2) = ($chrID->{$chr1},$chrID->{$chr2});
 | 
| 
 | 
  1625 	my ($coord_start_chr1,$coord_end_chr1,$coord_start_chr2,$coord_end_chr2) = @t[1,2,4,5];
 | 
| 
 | 
  1626 	my $numberOfPairs = $t[6];
 | 
| 
 | 
  1627 	my @strand1 = split (/,/,$t[8]);
 | 
| 
 | 
  1628 	my @strand2 = split (/,/,$t[9]);
 | 
| 
 | 
  1629 	my @ends_order1 = split (/,/,$t[10]);
 | 
| 
 | 
  1630 	my @ends_order2 = split (/,/,$t[11]);
 | 
| 
 | 
  1631 	my @order1 = split (/,/,$t[12]);
 | 
| 
 | 
  1632 	my @order2 = split (/,/,$t[13]);
 | 
| 
 | 
  1633 	my @positions1 = split (/,/,$t[14]); 
 | 
| 
 | 
  1634 	my @positions2 = split (/,/,$t[15]);
 | 
| 
 | 
  1635 	my @ends1; getEnds(\@ends1,\@positions1,\@strand1,\@ends_order1,$tag_length);
 | 
| 
 | 
  1636 	my @ends2; getEnds(\@ends2,\@positions2,\@strand2,\@ends_order2,$tag_length);
 | 
| 
 | 
  1637 	my $clusterCoordinates_chr1;
 | 
| 
 | 
  1638 	my $clusterCoordinates_chr2;
 | 
| 
 | 
  1639 	my $reads_left = 0;
 | 
| 
 | 
  1640 	
 | 
| 
 | 
  1641 	my $ifRenv = $t[16];
 | 
| 
 | 
  1642 	my $strand_ratio_filtering=$t[17]; 
 | 
| 
 | 
  1643 	
 | 
| 
 | 
  1644 	#kind of strand filtering. For example, will keep only FFF-RRR from a link FFRF-RRRF if <F-R> orientation is correct
 | 
| 
 | 
  1645 	my ($singleBreakpoint, %badInFRSense) = findBadInFRSenseSOLiDSolexa(\@strand1,\@strand2,\@ends_order1,\@ends_order2,\@order1,\@order2,$mate_sense);
 | 
| 
 | 
  1646 										#find pairs type F-RRRR or FFFF-R in the case if <R-F> orientation is correct
 | 
| 
 | 
  1647 										#These pairs are annotated as BED pairs forever! They won't be recycled!
 | 
| 
 | 
  1648 	my $table;
 | 
| 
 | 
  1649 	for my $i (0..$numberOfPairs-1) {					#fill the table with non adequate pairs: pairID	numberOfNonAdPairs nonAdPairIDs
 | 
| 
 | 
  1650 	    my $nonAdeq = 0;
 | 
| 
 | 
  1651 	    for my $j (0..$i-1) {
 | 
| 
 | 
  1652 		if (exists($table->{$j}->{$i})) {		    
 | 
| 
 | 
  1653 		    $nonAdeq++;
 | 
| 
 | 
  1654 		    $table->{$i}->{$j} = 1;		    
 | 
| 
 | 
  1655 		}
 | 
| 
 | 
  1656 	    }
 | 
| 
 | 
  1657 	    for my $j ($i+1..$numberOfPairs-1) {
 | 
| 
 | 
  1658 		if ($positions1[$j]-$positions1[$i]>$alphaDist) {		    
 | 
| 
 | 
  1659 		    if (&reversed ($i,$j,$ifRenv,\@positions2)) {
 | 
| 
 | 
  1660 			$nonAdeq++;
 | 
| 
 | 
  1661 			$table->{$i}->{$j} = 1;
 | 
| 
 | 
  1662 		    }
 | 
| 
 | 
  1663 		}
 | 
| 
 | 
  1664 	    }
 | 
| 
 | 
  1665 	    $table->{$i}->{nonAdeq} = $nonAdeq;   	    
 | 
| 
 | 
  1666 	}	
 | 
| 
 | 
  1667 							
 | 
| 
 | 
  1668 	for my $bad (keys %badInFRSense) { #remove pairs type F-RRRR or FFFF-R in the case of <R-F> orientation
 | 
| 
 | 
  1669 	    &remove($bad,$table);	    
 | 
| 
 | 
  1670 	}	
 | 
| 
 | 
  1671 	
 | 
| 
 | 
  1672 	my @falseReads;	
 | 
| 
 | 
  1673 							#RRRR-F -> RRRR or R-FFFF -> FFFF	
 | 
| 
 | 
  1674 	@falseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense, keys %{$table});
 | 
| 
 | 
  1675 										#these pairs will be recycled later as $secondTable
 | 
| 
 | 
  1676 	for my $bad (@falseReads) {
 | 
| 
 | 
  1677 	    &remove($bad,$table);
 | 
| 
 | 
  1678 	}			
 | 
| 
 | 
  1679 		
 | 
| 
 | 
  1680 	my $bad = &check($table); 
 | 
| 
 | 
  1681 	while ($bad ne "OK") {							#clear the table to reject non adequate pairs in the sense of ORDER
 | 
| 
 | 
  1682 	   # push (@falseReads, $bad);  remove completely!!!
 | 
| 
 | 
  1683 	    &remove($bad,$table);  
 | 
| 
 | 
  1684 	    $bad = &check($table);	    
 | 
| 
 | 
  1685 	}
 | 
| 
 | 
  1686 	
 | 
| 
 | 
  1687 	$reads_left = scalar keys %{$table}; 
 | 
| 
 | 
  1688 	my $coord_start_chr1_cluster1 = min(min(@positions1[sort {$a<=>$b} keys %{$table}]),min(@ends1[sort {$a<=>$b} keys %{$table}]));
 | 
| 
 | 
  1689 	my $coord_end_chr1_cluster1 = max(max(@positions1[sort {$a<=>$b} keys %{$table}]),max(@ends1[sort {$a<=>$b} keys %{$table}]));   
 | 
| 
 | 
  1690 	my $coord_start_chr2_cluster1 = min(min(@positions2[sort {$a<=>$b} keys %{$table}]),min(@ends2[sort {$a<=>$b} keys %{$table}]));
 | 
| 
 | 
  1691 	my $coord_end_chr2_cluster1 = max(max(@positions2[sort {$a<=>$b} keys %{$table}]),max(@ends2[sort {$a<=>$b} keys %{$table}]));  	
 | 
| 
 | 
  1692 	
 | 
| 
 | 
  1693 	$clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
 | 
| 
 | 
  1694 	$clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
 | 
| 
 | 
  1695 	
 | 
| 
 | 
  1696 	my $ifBalanced = 'UNBAL';
 | 
| 
 | 
  1697 	my $secondTable;
 | 
| 
 | 
  1698 	my $clusterCoordinates;
 | 
| 
 | 
  1699 	my ($break_pont_chr1,$break_pont_chr2);
 | 
| 
 | 
  1700 	
 | 
| 
 | 
  1701 	my $signatureType="";
 | 
| 
 | 
  1702 	
 | 
| 
 | 
  1703 	my $maxCoord1 =$chr->{$chr1}->{length};
 | 
| 
 | 
  1704 	my $maxCoord2 =$chr->{$chr2}->{length};
 | 
| 
 | 
  1705 	
 | 
| 
 | 
  1706 	if (scalar @falseReads) {
 | 
| 
 | 
  1707 	    @falseReads = sort @falseReads; 
 | 
| 
 | 
  1708 	    #now delete FRFR choosing the majority
 | 
| 
 | 
  1709 	    my @newfalseReads;	   							#find and remove pairs type RRRR-F or R-FFFF	
 | 
| 
 | 
  1710 	    @newfalseReads = findBadInRFSenseSOLiDSolexa(\@strand1,\@ends_order1,$mate_sense,@falseReads);	     #these @newfalseReads  won't be recycled
 | 
| 
 | 
  1711 	    my %hashTmp;
 | 
| 
 | 
  1712 	    for my $count1 (0..scalar(@falseReads)-1) {
 | 
| 
 | 
  1713 		my $i = $falseReads[$count1];
 | 
| 
 | 
  1714 		$hashTmp{$i} = 1;
 | 
| 
 | 
  1715 		for my $bad (@newfalseReads) {
 | 
| 
 | 
  1716 		    if ($bad == $i) {
 | 
| 
 | 
  1717 			delete $hashTmp{$i};
 | 
| 
 | 
  1718 			next;
 | 
| 
 | 
  1719 		    }
 | 
| 
 | 
  1720 		}
 | 
| 
 | 
  1721 	    }
 | 
| 
 | 
  1722 	    @falseReads = sort keys %hashTmp;  #what is left
 | 
| 
 | 
  1723 	    for my $count1 (0..scalar(@falseReads)-1) {				#fill the table for reads which were previously rejected  
 | 
| 
 | 
  1724 		my $nonAdeq = 0;
 | 
| 
 | 
  1725 		my $i = $falseReads[$count1];
 | 
| 
 | 
  1726 
 | 
| 
 | 
  1727 		for my $count2 (0..$count1-1) {
 | 
| 
 | 
  1728 		    my $j = $falseReads[$count2];
 | 
| 
 | 
  1729 		    if (exists($secondTable->{$j}->{$i})) {		    
 | 
| 
 | 
  1730 			$nonAdeq++;
 | 
| 
 | 
  1731 			$secondTable->{$i}->{$j} = 1;		    
 | 
| 
 | 
  1732 		    }
 | 
| 
 | 
  1733 		}
 | 
| 
 | 
  1734 		for my $count2 ($count1+1..scalar(@falseReads)-1) {
 | 
| 
 | 
  1735 		    my $j = $falseReads[$count2];
 | 
| 
 | 
  1736 		    if ($positions1[$j]-$positions1[$i]>$alphaDist) {
 | 
| 
 | 
  1737 			if (&reversed ($i,$j,$ifRenv,\@positions2)) {
 | 
| 
 | 
  1738 			    $nonAdeq++;
 | 
| 
 | 
  1739 			    $secondTable->{$i}->{$j} = 1;
 | 
| 
 | 
  1740 			}
 | 
| 
 | 
  1741 		    }
 | 
| 
 | 
  1742 		}
 | 
| 
 | 
  1743 		$secondTable->{$i}->{nonAdeq} = $nonAdeq;
 | 
| 
 | 
  1744 	    }
 | 
| 
 | 
  1745 	    
 | 
| 
 | 
  1746 	    my @falseReads2;
 | 
| 
 | 
  1747 	    my $bad = &check($secondTable);
 | 
| 
 | 
  1748 	    while ($bad ne "OK") {						#clear the table to reject non adequate pairs
 | 
| 
 | 
  1749 		push (@falseReads2, $bad);
 | 
| 
 | 
  1750 		&remove($bad,$secondTable);
 | 
| 
 | 
  1751 		$bad = &check($secondTable);
 | 
| 
 | 
  1752 	    }
 | 
| 
 | 
  1753 	    if (scalar keys %{$secondTable} >= $nb_pairs_order_threshold) {
 | 
| 
 | 
  1754 		my $coord_start_chr1_cluster2 = min(min(@positions1[sort {$a<=>$b} keys %{$secondTable}]),min(@ends1[sort {$a<=>$b} keys %{$secondTable}]));
 | 
| 
 | 
  1755 		my $coord_end_chr1_cluster2 = max(max(@positions1[sort {$a<=>$b} keys %{$secondTable}]),max(@ends1[sort {$a<=>$b} keys %{$secondTable}]));   
 | 
| 
 | 
  1756 		my $coord_start_chr2_cluster2 = min(min(@positions2[sort {$a<=>$b} keys %{$secondTable}]),min(@ends2[sort {$a<=>$b} keys %{$secondTable}]));
 | 
| 
 | 
  1757 		my $coord_end_chr2_cluster2 = max(max(@positions2[sort {$a<=>$b} keys %{$secondTable}]),max(@ends2[sort {$a<=>$b} keys %{$secondTable}]));   		 
 | 
| 
 | 
  1758 		
 | 
| 
 | 
  1759 		$ifBalanced = 'BAL';
 | 
| 
 | 
  1760 		
 | 
| 
 | 
  1761 		if ($ifBalanced eq 'BAL') {
 | 
| 
 | 
  1762 		    
 | 
| 
 | 
  1763 		    if (scalar keys %{$table} < $nb_pairs_order_threshold) {
 | 
| 
 | 
  1764 			$ifBalanced = 'UNBAL';					#kill cluster 1!
 | 
| 
 | 
  1765 			($table,$secondTable)=($secondTable,$table);		#this means that one needs to exchange cluster1 with cluster2
 | 
| 
 | 
  1766 			$reads_left = scalar keys %{$table};
 | 
| 
 | 
  1767 			$coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
 | 
| 
 | 
  1768 			$coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;   
 | 
| 
 | 
  1769 			$coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
 | 
| 
 | 
  1770 			$coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
 | 
| 
 | 
  1771 			$clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
 | 
| 
 | 
  1772 			$clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';
 | 
| 
 | 
  1773 			
 | 
| 
 | 
  1774 		    } else {
 | 
| 
 | 
  1775 			
 | 
| 
 | 
  1776 			$reads_left += scalar keys %{$secondTable};
 | 
| 
 | 
  1777 			next if ($reads_left < $nb_pairs_threshold);
 | 
| 
 | 
  1778 			
 | 
| 
 | 
  1779 			if ($coord_end_chr1_cluster2 < $coord_start_chr1_cluster1) {
 | 
| 
 | 
  1780 			    ($table,$secondTable)=($secondTable,$table);	 #this means that one needs to exchange cluster1 with cluster2
 | 
| 
 | 
  1781 			    
 | 
| 
 | 
  1782 			    ($coord_start_chr1_cluster1,$coord_start_chr1_cluster2) = ($coord_start_chr1_cluster2,$coord_start_chr1_cluster1);
 | 
| 
 | 
  1783 			    ($coord_end_chr1_cluster1,$coord_end_chr1_cluster2)=($coord_end_chr1_cluster2,$coord_end_chr1_cluster1);   
 | 
| 
 | 
  1784 			    ($coord_start_chr2_cluster1,$coord_start_chr2_cluster2)=($coord_start_chr2_cluster2,$coord_start_chr2_cluster1);
 | 
| 
 | 
  1785 			    ($coord_end_chr2_cluster1 , $coord_end_chr2_cluster2)=($coord_end_chr2_cluster2 , $coord_end_chr2_cluster1);
 | 
| 
 | 
  1786 			    
 | 
| 
 | 
  1787 			    $clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.'),'.$clusterCoordinates_chr1;
 | 
| 
 | 
  1788 			    $clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.'),'.$clusterCoordinates_chr2;			
 | 
| 
 | 
  1789 			}else {
 | 
| 
 | 
  1790 			    $clusterCoordinates_chr1 .= ',('.$coord_start_chr1_cluster2.','.$coord_end_chr1_cluster2.')';
 | 
| 
 | 
  1791 			    $clusterCoordinates_chr2 .= ',('.$coord_start_chr2_cluster2.','.$coord_end_chr2_cluster2.')';			
 | 
| 
 | 
  1792 			}
 | 
| 
 | 
  1793 			$coord_start_chr1 = min($coord_start_chr1_cluster1,$coord_start_chr1_cluster2);
 | 
| 
 | 
  1794 			$coord_end_chr1 = max($coord_end_chr1_cluster1,$coord_end_chr1_cluster2);
 | 
| 
 | 
  1795 			$coord_start_chr2 =  min($coord_start_chr2_cluster1,$coord_start_chr2_cluster2);
 | 
| 
 | 
  1796 			$coord_end_chr2 = max($coord_end_chr2_cluster1,$coord_end_chr2_cluster2);
 | 
| 
 | 
  1797 			#to calculate breakpoints one need to take into account read orientation in claster..
 | 
| 
 | 
  1798 			my $leftLetterOk = substr($mate_sense, 0, 1);  #R
 | 
| 
 | 
  1799 			my $rightLetterOk = substr($mate_sense, 1, 1); #F
 | 
| 
 | 
  1800 			
 | 
| 
 | 
  1801 			
 | 
| 
 | 
  1802 			my @index1 = keys %{$table};
 | 
| 
 | 
  1803 			my @index2 = keys %{$secondTable};
 | 
| 
 | 
  1804 			
 | 
| 
 | 
  1805 			my (@generalStrand1,@generalStrand2) = 0;
 | 
| 
 | 
  1806 			
 | 
| 
 | 
  1807 			if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
 | 
| 
 | 
  1808 			    $leftLetterOk = 'R';
 | 
| 
 | 
  1809 			    $rightLetterOk = 'F';
 | 
| 
 | 
  1810 			    @generalStrand1 = translateSolidToRF(\@strand1,\@ends_order1);	
 | 
| 
 | 
  1811 			    @generalStrand2 = translateSolidToRF(\@strand2,\@ends_order2);
 | 
| 
 | 
  1812 			} else {
 | 
| 
 | 
  1813 			    @generalStrand1 = @strand1;
 | 
| 
 | 
  1814 			    @generalStrand2 = @strand2; # TODO check if it is correct
 | 
| 
 | 
  1815 			}
 | 
| 
 | 
  1816 			if ($generalStrand1[$index1[0]] eq $leftLetterOk && $generalStrand1[$index2[0]] eq $rightLetterOk) { #(R,F)
 | 
| 
 | 
  1817 			    $break_pont_chr1 = '('.$coord_end_chr1_cluster1.','.$coord_start_chr1_cluster2.')';
 | 
| 
 | 
  1818 			    
 | 
| 
 | 
  1819 			    if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) {
 | 
| 
 | 
  1820 				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
 | 
| 
 | 
  1821 				    $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
 | 
| 
 | 
  1822 				    $signatureType = "TRANSLOC";
 | 
| 
 | 
  1823 				} else {
 | 
| 
 | 
  1824 				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';	
 | 
| 
 | 
  1825 				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
 | 
| 
 | 
  1826 				    $signatureType = "INS_FRAGMT";
 | 
| 
 | 
  1827 				}		    
 | 
| 
 | 
  1828 				
 | 
| 
 | 
  1829 			    } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
 | 
| 
 | 
  1830 				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
 | 
| 
 | 
  1831 				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';	
 | 
| 
 | 
  1832 				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
 | 
| 
 | 
  1833 				    $signatureType = "INV_INS_FRAGMT";
 | 
| 
 | 
  1834 				} else {
 | 
| 
 | 
  1835 				    $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
 | 
| 
 | 
  1836 				    $signatureType = "INV_TRANSLOC";
 | 
| 
 | 
  1837 				}	
 | 
| 
 | 
  1838 			    } else {
 | 
| 
 | 
  1839 				#should not occur
 | 
| 
 | 
  1840 				print STDERR "\nError in orderFiltering\n\n";				    
 | 
| 
 | 
  1841 			    }
 | 
| 
 | 
  1842 			}
 | 
| 
 | 
  1843 			
 | 
| 
 | 
  1844 			elsif ($generalStrand1[$index1[0]] eq $rightLetterOk && $generalStrand1[$index2[0]] eq $leftLetterOk) { #(F,R)
 | 
| 
 | 
  1845 			    $break_pont_chr1 = '('.max(($coord_end_chr1_cluster1-$maxFragmentLength),1).','.$coord_start_chr1_cluster1.')';	
 | 
| 
 | 
  1846 			    $break_pont_chr1 .= ',('.$coord_end_chr1_cluster2.','.min(($coord_start_chr1_cluster2+$maxFragmentLength),$maxCoord1).')';				
 | 
| 
 | 
  1847 			    if ($generalStrand2[$index1[0]] eq $rightLetterOk && $generalStrand2[$index2[0]] eq $leftLetterOk) { 
 | 
| 
 | 
  1848 				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
 | 
| 
 | 
  1849 				    $break_pont_chr2 = '('.$coord_end_chr2_cluster2.','.$coord_start_chr2_cluster1.')';
 | 
| 
 | 
  1850 				    $signatureType = "INV_INS_FRAGMT";
 | 
| 
 | 
  1851 				} else {
 | 
| 
 | 
  1852 				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster1-$maxFragmentLength),1).','.$coord_start_chr2_cluster1.')';	
 | 
| 
 | 
  1853 				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster2.','.min(($coord_start_chr2_cluster2+$maxFragmentLength),$maxCoord2).')';
 | 
| 
 | 
  1854 				    $signatureType = "INV_COAMPLICON";
 | 
| 
 | 
  1855 				}		    
 | 
| 
 | 
  1856 				
 | 
| 
 | 
  1857 			    } elsif ($generalStrand2[$index1[0]] eq $leftLetterOk && $generalStrand2[$index2[0]] eq $rightLetterOk) {
 | 
| 
 | 
  1858 				if ($coord_end_chr2_cluster1 >= $coord_end_chr2_cluster2) {
 | 
| 
 | 
  1859 				    $break_pont_chr2 = '('.max(($coord_end_chr2_cluster2-$maxFragmentLength),1).','.$coord_start_chr2_cluster2.')';	
 | 
| 
 | 
  1860 				    $break_pont_chr2 .= ',('.$coord_end_chr2_cluster1.','.min(($coord_start_chr2_cluster1+$maxFragmentLength),$maxCoord2).')';
 | 
| 
 | 
  1861 				    $signatureType = "COAMPLICON";
 | 
| 
 | 
  1862 				} else {
 | 
| 
 | 
  1863 				    $break_pont_chr2 = '('.$coord_end_chr2_cluster1.','.$coord_start_chr2_cluster2.')';
 | 
| 
 | 
  1864 				    $signatureType = "INS_FRAGMT";
 | 
| 
 | 
  1865 				}	
 | 
| 
 | 
  1866 			    } else {
 | 
| 
 | 
  1867 				#should not occur
 | 
| 
 | 
  1868 				$signatureType = "UNDEFINED";
 | 
| 
 | 
  1869 			    }
 | 
| 
 | 
  1870 			}			    
 | 
| 
 | 
  1871 			else { # (F,F) or (R,R)  something strange. We will discard the smallest cluster
 | 
| 
 | 
  1872 			    $ifBalanced = 'UNBAL';				
 | 
| 
 | 
  1873 			    if (scalar keys %{$secondTable} > scalar keys %{$table}) {
 | 
| 
 | 
  1874 				($table,$secondTable)=($secondTable,$table);		#this means that one needs to exchange cluster1 with cluster2
 | 
| 
 | 
  1875 				
 | 
| 
 | 
  1876 				$coord_start_chr1_cluster1 = $coord_start_chr1_cluster2;
 | 
| 
 | 
  1877 				$coord_end_chr1_cluster1 = $coord_end_chr1_cluster2;   
 | 
| 
 | 
  1878 				$coord_start_chr2_cluster1 = $coord_start_chr2_cluster2;
 | 
| 
 | 
  1879 				$coord_end_chr2_cluster1 = $coord_end_chr2_cluster2;
 | 
| 
 | 
  1880 				$clusterCoordinates_chr1 = '('.$coord_start_chr1_cluster1.','.$coord_end_chr1_cluster1.')';
 | 
| 
 | 
  1881 				$clusterCoordinates_chr2 = '('.$coord_start_chr2_cluster1.','.$coord_end_chr2_cluster1.')';			    
 | 
| 
 | 
  1882 			    }
 | 
| 
 | 
  1883 			    $reads_left = scalar keys %{$table};
 | 
| 
 | 
  1884 			}
 | 
| 
 | 
  1885 			if ($ifBalanced eq 'BAL') {
 | 
| 
 | 
  1886 			    $ifRenv = $signatureType;
 | 
| 
 | 
  1887 			}	
 | 
| 
 | 
  1888 		    }
 | 
| 
 | 
  1889 		}
 | 
| 
 | 
  1890 	    }   
 | 
| 
 | 
  1891 	}	
 | 
| 
 | 
  1892 	if ($ifBalanced ne 'BAL') {
 | 
| 
 | 
  1893 	    #define possible break point
 | 
| 
 | 
  1894 	    $coord_start_chr1 = $coord_start_chr1_cluster1;
 | 
| 
 | 
  1895 	    $coord_end_chr1 = $coord_end_chr1_cluster1;
 | 
| 
 | 
  1896 	    $coord_start_chr2 = $coord_start_chr2_cluster1;
 | 
| 
 | 
  1897 	    $coord_end_chr2 = $coord_end_chr2_cluster1;
 | 
| 
 | 
  1898 	    
 | 
| 
 | 
  1899 	    my $region_length_chr1 = $coord_end_chr1-$coord_start_chr1;
 | 
| 
 | 
  1900 	    my $region_length_chr2 = $coord_end_chr2-$coord_start_chr2;
 | 
| 
 | 
  1901 
 | 
| 
 | 
  1902 	    my $leftLetterOk = substr($mate_sense, 0, 1);  #R
 | 
| 
 | 
  1903 	    my $rightLetterOk = substr($mate_sense, 1, 1); #F
 | 
| 
 | 
  1904 	    
 | 
| 
 | 
  1905 	    my @index = keys %{$table};			    
 | 
| 
 | 
  1906 	    unless ($diff_sense_ends) {
 | 
| 
 | 
  1907 		my $firstEndOrder1 = $ends_order1[$index[0]];
 | 
| 
 | 
  1908 		my $firstEndOrder2 = $ends_order2[$index[0]];
 | 
| 
 | 
  1909 	        $break_pont_chr1 = (($strand1[$index[0]] eq 'R' && $firstEndOrder1 == 2) || ($strand1[$index[0]] eq 'F' && $firstEndOrder1 == 1))?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
 | 
| 
 | 
  1910 	        $break_pont_chr2 = (($strand2[$index[0]] eq 'R' && $firstEndOrder2 == 2) || ($strand2[$index[0]] eq 'F' && $firstEndOrder2 == 1))?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';	
 | 
| 
 | 
  1911 	    } else {
 | 
| 
 | 
  1912 	        $break_pont_chr1 = ($strand1[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr1.','.min(($coord_start_chr1+$maxFragmentLength),$maxCoord1).')':'('.max(($coord_end_chr1-$maxFragmentLength),1).','.$coord_start_chr1.')';
 | 
| 
 | 
  1913 	        $break_pont_chr2 = ($strand2[$index[0]] eq $leftLetterOk )?'('.$coord_end_chr2.','.min(($coord_start_chr2+$maxFragmentLength),$maxCoord2).')':'('.max(($coord_end_chr2-$maxFragmentLength),1).','.$coord_start_chr2.')';	
 | 
| 
 | 
  1914 	    }
 | 
| 
 | 
  1915 	    
 | 
| 
 | 
  1916 	    if ($chr1 ne $chr2){
 | 
| 
 | 
  1917 		$ifRenv="INV_TRANSLOC" if($ifRenv eq "REVERSE_SENSE");
 | 
| 
 | 
  1918 		$ifRenv="TRANSLOC" if($ifRenv eq "NORMAL_SENSE");
 | 
| 
 | 
  1919 	    }  
 | 
| 
 | 
  1920 	}
 | 
| 
 | 
  1921 	
 | 
| 
 | 
  1922 	if (($ifBalanced eq 'BAL')&&( (scalar keys %{$table}) + (scalar keys %{$secondTable}) < $nb_pairs_threshold)) {
 | 
| 
 | 
  1923 	    next; 								#discard the link
 | 
| 
 | 
  1924 	}	
 | 
| 
 | 
  1925  	if (($ifBalanced eq 'UNBAL')&&(scalar keys %{$table} < $nb_pairs_threshold)) {
 | 
| 
 | 
  1926 	    next; 								#discard the link
 | 
| 
 | 
  1927 	}
 | 
| 
 | 
  1928 	my $ratioTxt = "$reads_left/".(scalar @pairs);
 | 
| 
 | 
  1929 	my ($n1,$nTot) = split ("/",$strand_ratio_filtering);
 | 
| 
 | 
  1930 	my $ratioReal = $reads_left/$nTot;    
 | 
| 
 | 
  1931 	
 | 
| 
 | 
  1932 	if ($coord_start_chr1<=0) {
 | 
| 
 | 
  1933 	    $coord_start_chr1=1;
 | 
| 
 | 
  1934 	}
 | 
| 
 | 
  1935 	if ($coord_start_chr2<=0) {
 | 
| 
 | 
  1936 	    $coord_start_chr2=1;
 | 
| 
 | 
  1937 	}
 | 
| 
 | 
  1938 	#create output	
 | 
| 
 | 
  1939 	my @link=($chr->{$chr1}->{name}, $coord_start_chr1 , $coord_end_chr1,	#all information output
 | 
| 
 | 
  1940 		  $chr->{$chr2}->{name}, $coord_start_chr2 , $coord_end_chr2,
 | 
| 
 | 
  1941 		  $reads_left,
 | 
| 
 | 
  1942 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@pairs),
 | 
| 
 | 
  1943 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand1),
 | 
| 
 | 
  1944 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@strand2),
 | 
| 
 | 
  1945 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order1),
 | 
| 
 | 
  1946 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@ends_order2),
 | 
| 
 | 
  1947 		  &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order1),
 | 
| 
 | 
  1948 		  &redraw(2,$table,$secondTable,\%badInFRSense,$ifBalanced,\@order2),
 | 
| 
 | 
  1949 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions1),
 | 
| 
 | 
  1950 		  &redraw(1,$table,$secondTable,\%badInFRSense,$ifBalanced,\@positions2),
 | 
| 
 | 
  1951 		  $ifRenv,
 | 
| 
 | 
  1952 		  $strand_ratio_filtering,
 | 
| 
 | 
  1953 		  $ifBalanced, $ratioTxt, $break_pont_chr1, $break_pont_chr2,
 | 
| 
 | 
  1954 		  $ratioReal, $nTot);
 | 
| 
 | 
  1955 	
 | 
| 
 | 
  1956 	$nb_links++;
 | 
| 
 | 
  1957 	print $fh2 join("\t",@link)."\n";
 | 
| 
 | 
  1958 	
 | 
| 
 | 
  1959 	if($record>=$warn){
 | 
| 
 | 
  1960 	    print LOG "-- $fchr : $warn links analysed - $nb_links links kept\n";
 | 
| 
 | 
  1961 	    $warn+=10000;
 | 
| 
 | 
  1962 	}
 | 
| 
 | 
  1963 	
 | 
| 
 | 
  1964     }
 | 
| 
 | 
  1965     $fh->close;
 | 
| 
 | 
  1966     $fh2->close;
 | 
| 
 | 
  1967     
 | 
| 
 | 
  1968     print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
 | 
| 
 | 
  1969 
 | 
| 
 | 
  1970 }
 | 
| 
 | 
  1971 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1972 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1973 #gets information about ends positions given start, direction and order
 | 
| 
 | 
  1974 sub getEnds {
 | 
| 
 | 
  1975     my ($ends,$starts,$strand,$end_order,$tag_length) = @_;
 | 
| 
 | 
  1976     for my $i (0..scalar(@{$starts})-1) {
 | 
| 
 | 
  1977 	$ends->[$i] = getEnd($starts->[$i],$strand->[$i],$end_order->[$i],$tag_length);
 | 
| 
 | 
  1978     }    
 | 
| 
 | 
  1979 }
 | 
| 
 | 
  1980 sub getEnd {
 | 
| 
 | 
  1981     my ($start,$strand, $end_order,$tag_length) = @_;       
 | 
| 
 | 
  1982     return ($strand eq 'F')? $start+$tag_length->{$end_order}-1:$start-$tag_length->{$end_order}+1;
 | 
| 
 | 
  1983 }
 | 
| 
 | 
  1984 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1985 #------------------------------------------------------------------------------#
 | 
| 
 | 
  1986 #gets starts and ends Coords when start=leftmost given positions, directions and orders
 | 
| 
 | 
  1987 sub getCoordswithLeftMost {
 | 
| 
 | 
  1988     
 | 
| 
 | 
  1989     my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_;
 | 
| 
 | 
  1990 
 | 
| 
 | 
  1991     for my $i (0..scalar(@{$positions})-1) {
 | 
| 
 | 
  1992 
 | 
| 
 | 
  1993 	if($strand->[$i] eq 'F'){
 | 
| 
 | 
  1994 	    $starts->[$i]=$positions->[$i];
 | 
| 
 | 
  1995 	    $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1;
 | 
| 
 | 
  1996 	}else{
 | 
| 
 | 
  1997 	    $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1;
 | 
| 
 | 
  1998 	    $ends->[$i]=$positions->[$i];
 | 
| 
 | 
  1999 	}
 | 
| 
 | 
  2000     }    
 | 
| 
 | 
  2001 }
 | 
| 
 | 
  2002 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2003 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2004 sub addInsertionInfo { 								#add field with INS,DEL,NA and distance between clusters and performs filtering 
 | 
| 
 | 
  2005     
 | 
| 
 | 
  2006     my ($chr,$chrID,$nb_pairs_threshold,$order_filtering,$indel_sigma_threshold,$dup_sigma_threshold,$singleton_sigma_threshold,$mu,$sigma,$mate_sense,$tag_length,$links_file,$flinks_file)=@_;
 | 
| 
 | 
  2007 
 | 
| 
 | 
  2008     my @sfile=split(/\./,$links_file);
 | 
| 
 | 
  2009     my $fchr=$sfile[$#sfile-2];
 | 
| 
 | 
  2010     
 | 
| 
 | 
  2011 
 | 
| 
 | 
  2012     my $diff_sense_ends=(($mate_sense eq "FR") || ($mate_sense eq "RF"))? 1:0; 
 | 
| 
 | 
  2013     
 | 
| 
 | 
  2014     my $record=0;
 | 
| 
 | 
  2015     my $nb_links=0;
 | 
| 
 | 
  2016     my $warn=10000;
 | 
| 
 | 
  2017     
 | 
| 
 | 
  2018     print LOG "\# Filtering out normal pairs using insert size...\n";
 | 
| 
 | 
  2019     print LOG "-- mu length=$mu, sigma length=$sigma, indel sigma threshold=$indel_sigma_threshold, dup sigma threshold=$dup_sigma_threshold\n";
 | 
| 
 | 
  2020     print LOG "-- using ".($mu-$indel_sigma_threshold*$sigma)."-".
 | 
| 
 | 
  2021 		      ($mu+$indel_sigma_threshold*$sigma)." as normal range of insert size for indels\n";
 | 
| 
 | 
  2022     print LOG "-- using ".($mu-$dup_sigma_threshold*$sigma)."-".
 | 
| 
 | 
  2023 		      ($mu+$dup_sigma_threshold*$sigma)." as normal range of insert size for duplications\n";
 | 
| 
 | 
  2024     print LOG "-- using ".($mu-$singleton_sigma_threshold*$sigma)." as the upper limit of insert size for singletons\n" if($mate_sense eq "RF");
 | 
| 
 | 
  2025 
 | 
| 
 | 
  2026     my $fh = new FileHandle;
 | 
| 
 | 
  2027     my $fh2 = new FileHandle;
 | 
| 
 | 
  2028     
 | 
| 
 | 
  2029     $fh->open("<$links_file") or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  2030     $fh2->open(">$flinks_file") or die "$0: can't write in the output: $flinks_file :$!\n";
 | 
| 
 | 
  2031     
 | 
| 
 | 
  2032     while(<$fh>){
 | 
| 
 | 
  2033 	
 | 
| 
 | 
  2034 	$record++; 
 | 
| 
 | 
  2035 	my @t = split;
 | 
| 
 | 
  2036 	my ($chr1,$chr2,$mates_list)=@t[0,3,7];
 | 
| 
 | 
  2037 	
 | 
| 
 | 
  2038 	if($chrID->{$chr1} ne $chrID->{$chr2}) {				#if inter-chromosomal link here (because sv_type=all), 
 | 
| 
 | 
  2039 	    $nb_links++;
 | 
| 
 | 
  2040 	    
 | 
| 
 | 
  2041 	    $t[16]="INV_TRANSLOC" if($t[16] eq "REVERSE_SENSE");
 | 
| 
 | 
  2042 	    $t[16]="TRANSLOC" if($t[16] eq "NORMAL_SENSE");
 | 
| 
 | 
  2043 	    
 | 
| 
 | 
  2044 	    $t[16].= "\t";
 | 
| 
 | 
  2045 	    $t[19].= "\t";
 | 
| 
 | 
  2046 	    
 | 
| 
 | 
  2047 	    print $fh2 join("\t",@t)."\n";
 | 
| 
 | 
  2048 	    
 | 
| 
 | 
  2049 	    if($record>=$warn){
 | 
| 
 | 
  2050 		print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
 | 
| 
 | 
  2051 		$warn+=10000;
 | 
| 
 | 
  2052 	    }								
 | 
| 
 | 
  2053 	    next;								
 | 
| 
 | 
  2054 	}									
 | 
| 
 | 
  2055 	
 | 
| 
 | 
  2056 	my $ifRenv = $t[16];
 | 
| 
 | 
  2057 	my $ifBalanced = "UNBAL";
 | 
| 
 | 
  2058 	$ifBalanced = $t[18] if ($order_filtering);
 | 
| 
 | 
  2059 		
 | 
| 
 | 
  2060 	my $numberOfPairs = $t[6];
 | 
| 
 | 
  2061 	my @positions1 = deleteBadOrderSensePairs(split (/,/,$t[14]));
 | 
| 
 | 
  2062 	my @positions2 = deleteBadOrderSensePairs(split (/,/,$t[15]));
 | 
| 
 | 
  2063 	
 | 
| 
 | 
  2064 	if ($ifBalanced eq "BAL") {
 | 
| 
 | 
  2065 	    
 | 
| 
 | 
  2066 	    if ($ifRenv eq "INV_TRANSLOC") {
 | 
| 
 | 
  2067 		$ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
 | 
| 
 | 
  2068 	    }
 | 
| 
 | 
  2069 	    if ($ifRenv eq "NORMAL_SENSE") {
 | 
| 
 | 
  2070 		$ifRenv = "TRANSLOC"; 
 | 
| 
 | 
  2071 	    }
 | 
| 
 | 
  2072 	    if ($ifRenv eq "REVERSE_SENSE") {
 | 
| 
 | 
  2073 		$ifRenv = "INV_FRAGMT"; #for intrachromosomal inverted translocation is the same as inverted fragment
 | 
| 
 | 
  2074 	    }
 | 
| 
 | 
  2075 	    $t[19].= "\t";
 | 
| 
 | 
  2076 	    
 | 
| 
 | 
  2077 	    my $meanDistance = 0;
 | 
| 
 | 
  2078 	    
 | 
| 
 | 
  2079 	    for my $i (0..$numberOfPairs-1) {
 | 
| 
 | 
  2080 		$meanDistance += $positions2[$i]-$positions1[$i];
 | 
| 
 | 
  2081 	    }
 | 
| 
 | 
  2082 	    $meanDistance /= $numberOfPairs;      
 | 
| 
 | 
  2083 	    
 | 
| 
 | 
  2084 	    $t[16] = $ifRenv."\t".$meanDistance;
 | 
| 
 | 
  2085 	    #dont touch the annotation. It should be already OK.
 | 
| 
 | 
  2086 	    
 | 
| 
 | 
  2087 	} else {
 | 
| 
 | 
  2088 	    #only for unbalanced
 | 
| 
 | 
  2089 	    
 | 
| 
 | 
  2090 	    my $ifoverlap=overlap($t[1],$t[2],$t[4],$t[5]);
 | 
| 
 | 
  2091 	    
 | 
| 
 | 
  2092 	    my $ends_sense_class = (deleteBadOrderSensePairs(split (/,/,$t[8])))[0].
 | 
| 
 | 
  2093 				   (deleteBadOrderSensePairs(split (/,/,$t[9])))[0];
 | 
| 
 | 
  2094 	    my $ends_order_class = (deleteBadOrderSensePairs(split (/,/,$t[10])))[0].
 | 
| 
 | 
  2095 				   (deleteBadOrderSensePairs(split (/,/,$t[11])))[0];
 | 
| 
 | 
  2096 	    
 | 
| 
 | 
  2097 	    my $indel_type = $ifRenv;
 | 
| 
 | 
  2098 	    
 | 
| 
 | 
  2099 	    my $meanDistance = "N/A";
 | 
| 
 | 
  2100 	    
 | 
| 
 | 
  2101 	    ($meanDistance, $indel_type) = checkIndel ($numberOfPairs,		#identify insertion type for rearrangments without inversion, calculates distance between cluster
 | 
| 
 | 
  2102 						       \@positions1,		#assign N/A to $indel_type if unknown
 | 
| 
 | 
  2103 						       \@positions2,
 | 
| 
 | 
  2104 						       $ifRenv,
 | 
| 
 | 
  2105 						       $ifoverlap,
 | 
| 
 | 
  2106 						       $indel_sigma_threshold,
 | 
| 
 | 
  2107 						       $dup_sigma_threshold,
 | 
| 
 | 
  2108 						       $singleton_sigma_threshold,
 | 
| 
 | 
  2109 						       $mu,
 | 
| 
 | 
  2110 						       $sigma,
 | 
| 
 | 
  2111 						       $ifBalanced,
 | 
| 
 | 
  2112 						       $ends_sense_class,
 | 
| 
 | 
  2113 						       $ends_order_class,
 | 
| 
 | 
  2114 						       $mate_sense,
 | 
| 
 | 
  2115 						       $diff_sense_ends,
 | 
| 
 | 
  2116 						       );
 | 
| 
 | 
  2117 	    
 | 
| 
 | 
  2118 	    #filtering of pairs with distance inconsistant with the SV
 | 
| 
 | 
  2119 	    if ($ifRenv ne "REVERSE_SENSE") {
 | 
| 
 | 
  2120 		my $maxCoord1 =$chr->{$chrID->{$chr1}}->{length};	
 | 
| 
 | 
  2121 		my $maxCoord2 =$chr->{$chrID->{$chr2}}->{length};
 | 
| 
 | 
  2122 		$meanDistance = recalc_t_usingInsertSizeInfo(\@t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,
 | 
| 
 | 
  2123 							     $maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering);
 | 
| 
 | 
  2124 		next if ($t[6] < $nb_pairs_threshold);
 | 
| 
 | 
  2125 	    }else{
 | 
| 
 | 
  2126 		 $t[19].= "\t";
 | 
| 
 | 
  2127 	    }
 | 
| 
 | 
  2128 	    $t[16] = $indel_type."\t".$meanDistance;
 | 
| 
 | 
  2129 	}
 | 
| 
 | 
  2130 	
 | 
| 
 | 
  2131 	$nb_links++;
 | 
| 
 | 
  2132 		
 | 
| 
 | 
  2133 	print $fh2 join("\t",@t)."\n";
 | 
| 
 | 
  2134 	if($record>=$warn){
 | 
| 
 | 
  2135 	     print LOG "-- $fchr : $warn links processed - $nb_links links kept\n";
 | 
| 
 | 
  2136 	    $warn+=10000;
 | 
| 
 | 
  2137 	}
 | 
| 
 | 
  2138     }
 | 
| 
 | 
  2139     $fh->close;
 | 
| 
 | 
  2140     $fh2->close; 
 | 
| 
 | 
  2141     
 | 
| 
 | 
  2142     print LOG "-- $fchr : Total : $record links analysed - $nb_links links kept\n";
 | 
| 
 | 
  2143     
 | 
| 
 | 
  2144 }
 | 
| 
 | 
  2145 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2146 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2147 sub checkIndel  {
 | 
| 
 | 
  2148     
 | 
| 
 | 
  2149     my ($numberOfPairs,	$positions1, $positions2, $ifRenv, $ifoverlap, $indel_sigma_threshold, $dup_sigma_threshold, $singleton_sigma_threshold,
 | 
| 
 | 
  2150 	$mu, $sigma, $ifBalanced,$ends_sense_class,$ends_order_class,$mate_sense,$diff_sense_ends) = @_;
 | 
| 
 | 
  2151     
 | 
| 
 | 
  2152     my $meanDistance = 0;
 | 
| 
 | 
  2153     
 | 
| 
 | 
  2154     for my $i (0..$numberOfPairs-1) {
 | 
| 
 | 
  2155 	$meanDistance += $positions2->[$i]-$positions1->[$i];
 | 
| 
 | 
  2156     }
 | 
| 
 | 
  2157     $meanDistance /= $numberOfPairs;
 | 
| 
 | 
  2158     
 | 
| 
 | 
  2159     return ($meanDistance,"INV_DUPLI") if (($ifRenv eq "REVERSE_SENSE") && ($meanDistance<$mu+$dup_sigma_threshold*$sigma) );
 | 
| 
 | 
  2160     
 | 
| 
 | 
  2161     return ($meanDistance,"INVERSION") if ($ifRenv eq "REVERSE_SENSE");
 | 
| 
 | 
  2162     
 | 
| 
 | 
  2163     if($diff_sense_ends){
 | 
| 
 | 
  2164 	return ($meanDistance, "LARGE_DUPLI") if ($ends_sense_class ne $mate_sense) && ($meanDistance>$mu+$dup_sigma_threshold*$sigma) ;
 | 
| 
 | 
  2165 	return ($meanDistance, "SINGLETON") if (($meanDistance<$mu-$singleton_sigma_threshold*$sigma) && $mate_sense eq "RF" && ($ends_sense_class eq inverseSense($mate_sense)));
 | 
| 
 | 
  2166     }else{
 | 
| 
 | 
  2167 	return ($meanDistance, "LARGE_DUPLI") if (($ends_sense_class eq $mate_sense) && ($ends_order_class eq "12") || ($ends_sense_class eq inverseSense($mate_sense)) && ($ends_order_class eq "21")) &&
 | 
| 
 | 
  2168 	($meanDistance>$mu+$dup_sigma_threshold*$sigma) ; 
 | 
| 
 | 
  2169     }
 | 
| 
 | 
  2170  
 | 
| 
 | 
  2171     return ($meanDistance, "SMALL_DUPLI") if (($meanDistance<$mu-$dup_sigma_threshold*$sigma) && $ifoverlap);
 | 
| 
 | 
  2172     
 | 
| 
 | 
  2173     return ($meanDistance, "DUPLICATION") if ($diff_sense_ends && ($ends_sense_class ne $mate_sense) && ($meanDistance<$mu-$dup_sigma_threshold*$sigma) ) ;
 | 
| 
 | 
  2174     
 | 
| 
 | 
  2175     return ($meanDistance, "INSERTION") if ($meanDistance<$mu -$indel_sigma_threshold*$sigma);
 | 
| 
 | 
  2176     return ($meanDistance, "DELETION") if ($meanDistance>$mu+$indel_sigma_threshold*$sigma);
 | 
| 
 | 
  2177 
 | 
| 
 | 
  2178     return ($meanDistance, "UNDEFINED");
 | 
| 
 | 
  2179 }
 | 
| 
 | 
  2180 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2181 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2182 #sub reacalulate @t so that get rid of unconsistent pairs (unconsistent insert size )
 | 
| 
 | 
  2183 sub recalc_t_usingInsertSizeInfo {
 | 
| 
 | 
  2184     my($t,$mu,$sigma,$meanDistance,$tag_length,$diff_sense_ends,$mate_sense,$maxCoord1,$maxCoord2,$ends_sense_class,$ends_order_class,$nb_pairs_threshold,$order_filtering) = @_;
 | 
| 
 | 
  2185  	    
 | 
| 
 | 
  2186     my @badPairs;
 | 
| 
 | 
  2187 	    
 | 
| 
 | 
  2188     my @positions1 = getAllEntries($t->[14]);
 | 
| 
 | 
  2189     my @positions2 = getAllEntries($t->[15]);	    
 | 
| 
 | 
  2190 	    
 | 
| 
 | 
  2191     if ($meanDistance < $mu) {
 | 
| 
 | 
  2192 	for my $i (0..scalar(@positions1)-1) {
 | 
| 
 | 
  2193 	    if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]>=$mu) {
 | 
| 
 | 
  2194 	    	push(@badPairs,$i);		    
 | 
| 
 | 
  2195 	    }
 | 
| 
 | 
  2196     	}
 | 
| 
 | 
  2197     } else {
 | 
| 
 | 
  2198 	for my $i (0..scalar(@positions1)-1) {
 | 
| 
 | 
  2199 	    if (substr($positions2[$i],-1,1) ne '$' && substr($positions2[$i],-1,1) ne '*' && $positions2[$i]-$positions1[$i]<=$mu) {
 | 
| 
 | 
  2200 		push(@badPairs,$i);
 | 
| 
 | 
  2201 	    }
 | 
| 
 | 
  2202 	}	    
 | 
| 
 | 
  2203     }
 | 
| 
 | 
  2204 
 | 
| 
 | 
  2205     if (scalar (@badPairs)>0) {
 | 
| 
 | 
  2206 	#print join("\t",@badPairs).": ".join("\t",@t)."\n";	        
 | 
| 
 | 
  2207 	#remove these inconsistant links
 | 
| 
 | 
  2208 	$t->[6] -= scalar(@badPairs); #numberOfPairs
 | 
| 
 | 
  2209 	return if ($t->[6] < $nb_pairs_threshold); 	
 | 
| 
 | 
  2210 
 | 
| 
 | 
  2211 	$t->[7] = mark_values(\@badPairs, $t->[7]);
 | 
| 
 | 
  2212 	$t->[8] = mark_values(\@badPairs, $t->[8]);
 | 
| 
 | 
  2213 	$t->[9] = mark_values(\@badPairs, $t->[9]);
 | 
| 
 | 
  2214 	$t->[10] = mark_values(\@badPairs, $t->[10]);
 | 
| 
 | 
  2215 	$t->[11] = mark_values(\@badPairs, $t->[11]);
 | 
| 
 | 
  2216 	
 | 
| 
 | 
  2217 	$t->[12] = mark_indexes(\@badPairs, $t->[12]);
 | 
| 
 | 
  2218 	$t->[13] = mark_indexes(\@badPairs, $t->[13]);	    
 | 
| 
 | 
  2219 		    
 | 
| 
 | 
  2220 	$t->[14] = mark_values(\@badPairs, $t->[14]);
 | 
| 
 | 
  2221 	$t->[15] = mark_values(\@badPairs, $t->[15]);
 | 
| 
 | 
  2222 	$t->[19] = recalculate_ratio($t->[6],$t->[19]) if ($order_filtering); #add the second ratio
 | 
| 
 | 
  2223 	$t->[17] = recalculate_ratio($t->[6],$t->[17]) unless ($order_filtering);
 | 
| 
 | 
  2224 	($t->[1],$t->[2]) = recalculate_boundaries($t->[14],$t->[8],$t->[10],$tag_length);
 | 
| 
 | 
  2225 	($t->[4],$t->[5]) = recalculate_boundaries($t->[15],$t->[9],$t->[11],$tag_length);	 
 | 
| 
 | 
  2226 		    
 | 
| 
 | 
  2227 	#recalc breakpoints:
 | 
| 
 | 
  2228 	my $quant001 = 3.090232;
 | 
| 
 | 
  2229 	my $maxFragmentLength = &floor($quant001 * $sigma + $mu);
 | 
| 
 | 
  2230 	$t->[20] = recalc_breakpoints($mate_sense,$maxCoord1,$t->[14],substr($ends_sense_class,0,1),substr($ends_order_class,0,1),$t->[1],$t->[2],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
 | 
| 
 | 
  2231 	$t->[21] = recalc_breakpoints($mate_sense,$maxCoord2,$t->[15],substr($ends_sense_class,1,1),substr($ends_order_class,1,1),$t->[4],$t->[5],$maxFragmentLength,$diff_sense_ends ) if ($order_filtering);
 | 
| 
 | 
  2232 	#recalc total ratio
 | 
| 
 | 
  2233 	$t->[22] = $t->[6] / $t->[23] if ($order_filtering);
 | 
| 
 | 
  2234 	$t->[18] = $t->[6] / $t->[19] unless ($order_filtering);
 | 
| 
 | 
  2235 	
 | 
| 
 | 
  2236     	@positions1 = deleteBadOrderSensePairs(split (/,/,$t->[14]));
 | 
| 
 | 
  2237     	@positions2 = deleteBadOrderSensePairs(split (/,/,$t->[15]));		
 | 
| 
 | 
  2238 	
 | 
| 
 | 
  2239 	$meanDistance = 0;
 | 
| 
 | 
  2240 	
 | 
| 
 | 
  2241 	for my $i (0..scalar(@positions1)-1) {
 | 
| 
 | 
  2242 	    $meanDistance += $positions2[$i]-$positions1[$i];
 | 
| 
 | 
  2243 	}
 | 
| 
 | 
  2244 	$meanDistance /= scalar(@positions1);
 | 
| 
 | 
  2245  	
 | 
| 
 | 
  2246     } else {
 | 
| 
 | 
  2247 	$t->[17] = recalculate_ratio((split(/\//,$t->[17]))[0],$t->[17]) unless ($order_filtering);
 | 
| 
 | 
  2248 	$t->[19] = recalculate_ratio((split(/\//,$t->[19]))[0],$t->[19]) if ($order_filtering);
 | 
| 
 | 
  2249     
 | 
| 
 | 
  2250     } #nothing has been filtered
 | 
| 
 | 
  2251     return $meanDistance;
 | 
| 
 | 
  2252 }
 | 
| 
 | 
  2253 
 | 
| 
 | 
  2254 sub recalculate_ratio {
 | 
| 
 | 
  2255     my ($left, $ratio) = @_;
 | 
| 
 | 
  2256     my @elements = split (/\//,$ratio);
 | 
| 
 | 
  2257     $elements[1]= $elements[0];
 | 
| 
 | 
  2258     $elements[0]=$left;
 | 
| 
 | 
  2259     return $ratio."\t".join("/",@elements);    
 | 
| 
 | 
  2260 }
 | 
| 
 | 
  2261 
 | 
| 
 | 
  2262 sub recalc_breakpoints {
 | 
| 
 | 
  2263     my ($mate_sense,$maxCoord,$startString,$strand,$firstEndOrder,$coord_start_chr,$coord_end_chr,$maxFragmentLength,$diff_sense_ends ) = @_;
 | 
| 
 | 
  2264     my $break_pont_chr;
 | 
| 
 | 
  2265     
 | 
| 
 | 
  2266     my $leftLetterOk = substr($mate_sense, 0, 1);  #R
 | 
| 
 | 
  2267     my $rightLetterOk = substr($mate_sense, 1, 1); #F	    
 | 
| 
 | 
  2268 
 | 
| 
 | 
  2269 	
 | 
| 
 | 
  2270     my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
 | 
| 
 | 
  2271 
 | 
| 
 | 
  2272     unless ($diff_sense_ends) {
 | 
| 
 | 
  2273 	$break_pont_chr = (($strand eq 'R' && $firstEndOrder == 2) || ($strand eq 'F' && $firstEndOrder == 1))?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
 | 
| 
 | 
  2274     } else {
 | 
| 
 | 
  2275 	$break_pont_chr = ($strand eq $leftLetterOk)?'('.$coord_end_chr.','.min(($coord_start_chr+$maxFragmentLength),$maxCoord).')':'('.max(($coord_end_chr-$maxFragmentLength),1).','.$coord_start_chr.')';
 | 
| 
 | 
  2276     }    
 | 
| 
 | 
  2277     return $break_pont_chr;
 | 
| 
 | 
  2278 }
 | 
| 
 | 
  2279 sub recalculate_boundaries {
 | 
| 
 | 
  2280     my ($startString,$senseString,$endsOrderString,$tag_length) = @_;
 | 
| 
 | 
  2281     my @positions = deleteBadOrderSensePairs(split (/,/,$startString));
 | 
| 
 | 
  2282     my @strands = deleteBadOrderSensePairs(split (/,/,$senseString));
 | 
| 
 | 
  2283     my @ends_orders = deleteBadOrderSensePairs(split (/,/,$endsOrderString));
 | 
| 
 | 
  2284     my @ends; getEnds(\@ends,\@positions,\@strands,\@ends_orders,$tag_length);   
 | 
| 
 | 
  2285     my $coord_start_cluster = min(min(@positions),min(@ends));
 | 
| 
 | 
  2286     my $coord_end_cluster = max(max(@positions),max(@ends));
 | 
| 
 | 
  2287     return ($coord_start_cluster,$coord_end_cluster);
 | 
| 
 | 
  2288 }
 | 
| 
 | 
  2289 
 | 
| 
 | 
  2290 sub remove_indexes {
 | 
| 
 | 
  2291     my ($bads, $string) = @_;
 | 
| 
 | 
  2292     my @elements = deleteBadOrderSensePairs(split (/,/,$string));
 | 
| 
 | 
  2293     for my $i (reverse sort %{$bads}) {
 | 
| 
 | 
  2294 	delete $elements[$i];
 | 
| 
 | 
  2295     }
 | 
| 
 | 
  2296     return "(".join(",",@elements).")";
 | 
| 
 | 
  2297 }
 | 
| 
 | 
  2298 ##add @ to to elements 
 | 
| 
 | 
  2299 sub mark_values {
 | 
| 
 | 
  2300     my ($bads, $string) = @_;
 | 
| 
 | 
  2301     my @elements = getAllEntries($string);
 | 
| 
 | 
  2302     for my $i (@{$bads}) {
 | 
| 
 | 
  2303 	$elements[$i] .= "@";
 | 
| 
 | 
  2304     }    
 | 
| 
 | 
  2305     return "(".join(",",@elements).")";
 | 
| 
 | 
  2306 }
 | 
| 
 | 
  2307 ##add @ to to indexes
 | 
| 
 | 
  2308 sub mark_indexes {
 | 
| 
 | 
  2309     my ($bads, $string) = @_;
 | 
| 
 | 
  2310     my @elements = getAllEntries($string);
 | 
| 
 | 
  2311     for my $i ((0..scalar(@elements)-1)) {
 | 
| 
 | 
  2312 	for my $j (@{$bads}) {	    
 | 
| 
 | 
  2313 	    $elements[$i] .= "@" if ($elements[$i] eq ($j+1));
 | 
| 
 | 
  2314 	}	   
 | 
| 
 | 
  2315     }
 | 
| 
 | 
  2316 
 | 
| 
 | 
  2317     return "(".join(",",@elements).")";    
 | 
| 
 | 
  2318 }
 | 
| 
 | 
  2319 
 | 
| 
 | 
  2320 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2321 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2322 sub redraw {
 | 
| 
 | 
  2323     
 | 
| 
 | 
  2324     my ($type,$table,$secondTable,$badInFRSense,$ifBalanced,$arr) = @_;	
 | 
| 
 | 
  2325 
 | 
| 
 | 
  2326     my $out;
 | 
| 
 | 
  2327     my @first_arr;
 | 
| 
 | 
  2328     if ($ifBalanced eq 'BAL') {
 | 
| 
 | 
  2329 	my @second_arr;
 | 
| 
 | 
  2330 	my $lastPushed = 1;
 | 
| 
 | 
  2331 	if ($type == 1) {
 | 
| 
 | 
  2332 	    for my $i (0 .. scalar(@{$arr})-1) {
 | 
| 
 | 
  2333 		if (exists ($table->{$i})) {
 | 
| 
 | 
  2334 	    	    push(@first_arr,$arr->[$i]);
 | 
| 
 | 
  2335 		    $lastPushed = 1;
 | 
| 
 | 
  2336 		}elsif (exists ($secondTable->{$i})) {
 | 
| 
 | 
  2337 		    push(@second_arr,$arr->[$i]);
 | 
| 
 | 
  2338 		    $lastPushed = 2;
 | 
| 
 | 
  2339 		} elsif ($lastPushed == 1) {
 | 
| 
 | 
  2340 		    if (exists ($badInFRSense->{$i})) {
 | 
| 
 | 
  2341 		        push(@first_arr,$arr->[$i]."\$");
 | 
| 
 | 
  2342 		    }else {
 | 
| 
 | 
  2343 		        push(@first_arr,$arr->[$i]."*");
 | 
| 
 | 
  2344 		    }
 | 
| 
 | 
  2345 		} elsif ($lastPushed == 2) {
 | 
| 
 | 
  2346 		    if (exists ($badInFRSense->{$i})) {
 | 
| 
 | 
  2347 		        push(@second_arr,$arr->[$i]."\$");
 | 
| 
 | 
  2348 		    }else {
 | 
| 
 | 
  2349 			push(@second_arr,$arr->[$i]."*");
 | 
| 
 | 
  2350 		    }
 | 
| 
 | 
  2351 		} else {print "Error!";exit;}
 | 
| 
 | 
  2352 	    }
 | 
| 
 | 
  2353 	} else {
 | 
| 
 | 
  2354 	     for my $i (@{$arr}) {
 | 
| 
 | 
  2355 		if (exists ($table->{$i-1})) {
 | 
| 
 | 
  2356 	    	    push(@first_arr,$i);
 | 
| 
 | 
  2357 		    $lastPushed = 1;
 | 
| 
 | 
  2358 		}elsif (exists ($secondTable->{$i-1})) {
 | 
| 
 | 
  2359 		    push(@second_arr,$i);
 | 
| 
 | 
  2360 		    $lastPushed = 2;
 | 
| 
 | 
  2361 		} elsif ($lastPushed == 1) {
 | 
| 
 | 
  2362 		    if (exists ($badInFRSense->{$i-1})) {
 | 
| 
 | 
  2363 		        push(@first_arr,$i."\$");
 | 
| 
 | 
  2364 		    }else {
 | 
| 
 | 
  2365 			push(@first_arr,$i."*");
 | 
| 
 | 
  2366 		    }
 | 
| 
 | 
  2367 		} elsif ($lastPushed == 2) {
 | 
| 
 | 
  2368 		    if (exists ($badInFRSense->{$i-1})) {
 | 
| 
 | 
  2369 		        push(@second_arr,$i."\$");
 | 
| 
 | 
  2370 		    }else {
 | 
| 
 | 
  2371 			push(@second_arr,$i."*");
 | 
| 
 | 
  2372 		    }
 | 
| 
 | 
  2373 		} else {print "Error!";exit;}
 | 
| 
 | 
  2374 	    }
 | 
| 
 | 
  2375 	}		
 | 
| 
 | 
  2376 	$out = '('.join(",",@first_arr).'),('.join(",",@second_arr).')';
 | 
| 
 | 
  2377     }
 | 
| 
 | 
  2378     else {
 | 
| 
 | 
  2379 	if ($type == 1) {
 | 
| 
 | 
  2380 	    for my $i (0 .. scalar(@{$arr})-1) { 
 | 
| 
 | 
  2381 		if (exists ($table->{$i})) {
 | 
| 
 | 
  2382 	    	    push(@first_arr,$arr->[$i]);
 | 
| 
 | 
  2383 		} else {
 | 
| 
 | 
  2384 		    if (exists ($badInFRSense->{$i})) {
 | 
| 
 | 
  2385 			push(@first_arr,$arr->[$i]."\$");
 | 
| 
 | 
  2386 		    }else {
 | 
| 
 | 
  2387 		        push(@first_arr,$arr->[$i]."*");
 | 
| 
 | 
  2388 		    }
 | 
| 
 | 
  2389 		}
 | 
| 
 | 
  2390 	    }
 | 
| 
 | 
  2391 	} else {
 | 
| 
 | 
  2392 	    for my $i (@{$arr}) {
 | 
| 
 | 
  2393 		if (exists ($table->{$i-1})) {
 | 
| 
 | 
  2394 	    	    push(@first_arr,$i);
 | 
| 
 | 
  2395 		} else {
 | 
| 
 | 
  2396 		    if (exists ($badInFRSense->{$i-1})) {
 | 
| 
 | 
  2397 			push(@first_arr,$i."\$");
 | 
| 
 | 
  2398 		    }else {
 | 
| 
 | 
  2399 			push(@first_arr,$i."*");
 | 
| 
 | 
  2400 		    }
 | 
| 
 | 
  2401 		}
 | 
| 
 | 
  2402 	    }	    
 | 
| 
 | 
  2403 	}	
 | 
| 
 | 
  2404 	$out = '('.join(",",@first_arr).')';
 | 
| 
 | 
  2405     }    
 | 
| 
 | 
  2406     return $out;
 | 
| 
 | 
  2407 }
 | 
| 
 | 
  2408 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2409 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2410 sub check {
 | 
| 
 | 
  2411     
 | 
| 
 | 
  2412     my $table = $_[0];
 | 
| 
 | 
  2413     my $bad = 'OK';
 | 
| 
 | 
  2414     my $max = 0;  
 | 
| 
 | 
  2415     for my $i (sort {$a<=>$b} keys %{$table}) { 				
 | 
| 
 | 
  2416 	unless ($table->{$i}->{nonAdeq} == 0) {
 | 
| 
 | 
  2417 	    if ($max<$table->{$i}->{nonAdeq}) {
 | 
| 
 | 
  2418 		$max=$table->{$i}->{nonAdeq};
 | 
| 
 | 
  2419 		$bad = $i;
 | 
| 
 | 
  2420 	    }
 | 
| 
 | 
  2421 	}	    
 | 
| 
 | 
  2422     } 
 | 
| 
 | 
  2423     return $bad;
 | 
| 
 | 
  2424 }
 | 
| 
 | 
  2425 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2426 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2427 sub reversed {
 | 
| 
 | 
  2428     
 | 
| 
 | 
  2429     my ($i,$j,$ifRenv,$positions) = @_;
 | 
| 
 | 
  2430     if (($ifRenv eq 'REVERSE_SENSE' && $positions->[$i]<$positions->[$j]) || ($ifRenv ne 'REVERSE_SENSE' && $positions->[$i]>$positions->[$j])){
 | 
| 
 | 
  2431 	return 1;
 | 
| 
 | 
  2432     }
 | 
| 
 | 
  2433     return 0;    
 | 
| 
 | 
  2434 }
 | 
| 
 | 
  2435 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2436 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2437 sub remove {
 | 
| 
 | 
  2438     
 | 
| 
 | 
  2439     my ($bad,$table) = @_;
 | 
| 
 | 
  2440     for my $i (sort {$a<=>$b} keys %{$table}) {
 | 
| 
 | 
  2441 	if ($bad == $i) {
 | 
| 
 | 
  2442 	    delete($table->{$i});;
 | 
| 
 | 
  2443 	} else {
 | 
| 
 | 
  2444 	    if (exists($table->{$i}->{$bad})) {
 | 
| 
 | 
  2445 		delete($table->{$i}->{$bad});
 | 
| 
 | 
  2446 		$table->{$i}->{nonAdeq}--;
 | 
| 
 | 
  2447 	    }
 | 
| 
 | 
  2448 	}
 | 
| 
 | 
  2449     }
 | 
| 
 | 
  2450 }
 | 
| 
 | 
  2451 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2452 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2453 sub findBadInRFSenseSOLiDSolexa { #choose maximum: FFFFs or RRRRs
 | 
| 
 | 
  2454     
 | 
| 
 | 
  2455     my ($strand,$ends_order,$mate_sense,@keysLeft) = @_;
 | 
| 
 | 
  2456     
 | 
| 
 | 
  2457     my $leftLetterOk = substr($mate_sense, 0, 1);  #R
 | 
| 
 | 
  2458     my $rightLetterOk = substr($mate_sense, 1, 1); #F
 | 
| 
 | 
  2459     
 | 
| 
 | 
  2460     my (@standardArray);
 | 
| 
 | 
  2461     if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
 | 
| 
 | 
  2462 	$leftLetterOk = 'R';
 | 
| 
 | 
  2463 	$rightLetterOk = 'F';
 | 
| 
 | 
  2464 	@standardArray = translateSolidToRF($strand,$ends_order);
 | 
| 
 | 
  2465     } else {
 | 
| 
 | 
  2466 	@standardArray = @{$strand};	
 | 
| 
 | 
  2467     }
 | 
| 
 | 
  2468    
 | 
| 
 | 
  2469     my $ifR = 0;
 | 
| 
 | 
  2470     my @Rs;
 | 
| 
 | 
  2471     
 | 
| 
 | 
  2472     for my $i (@keysLeft) {	
 | 
| 
 | 
  2473 	if ($standardArray[$i] eq $leftLetterOk) {
 | 
| 
 | 
  2474 	    $ifR++;
 | 
| 
 | 
  2475 	    push(@Rs,$i);
 | 
| 
 | 
  2476 	}
 | 
| 
 | 
  2477     }
 | 
| 
 | 
  2478  
 | 
| 
 | 
  2479 
 | 
| 
 | 
  2480     my $ifF = 0;
 | 
| 
 | 
  2481     my @Fs;
 | 
| 
 | 
  2482     
 | 
| 
 | 
  2483     for my $i (@keysLeft) {	
 | 
| 
 | 
  2484 	if ($standardArray[$i] eq $rightLetterOk) {
 | 
| 
 | 
  2485 	    $ifF++;
 | 
| 
 | 
  2486 	    push(@Fs,$i);
 | 
| 
 | 
  2487 	}
 | 
| 
 | 
  2488     }
 | 
| 
 | 
  2489     
 | 
| 
 | 
  2490     if($ifR>=$ifF) {
 | 
| 
 | 
  2491 	return @Fs;
 | 
| 
 | 
  2492     }
 | 
| 
 | 
  2493     return @Rs;     
 | 
| 
 | 
  2494 }
 | 
| 
 | 
  2495 
 | 
| 
 | 
  2496 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2497 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2498 sub findBadInFRSenseSOLiDSolexa { #should work both for SOLiD and Solexa 
 | 
| 
 | 
  2499     
 | 
| 
 | 
  2500     my ($strand1,$strand2,$ends_order1,$ends_order2,$order1,$order2) = ($_[0],$_[1],$_[2],$_[3],$_[4],$_[5]);
 | 
| 
 | 
  2501     my $mate_sense =  $_[6];
 | 
| 
 | 
  2502       
 | 
| 
 | 
  2503     my $leftLetterOk = substr($mate_sense, 0, 1);  #R
 | 
| 
 | 
  2504     my $rightLetterOk = substr($mate_sense, 1, 1); #F
 | 
| 
 | 
  2505     
 | 
| 
 | 
  2506     my (@standardArray1,@standardArray2);
 | 
| 
 | 
  2507    
 | 
| 
 | 
  2508     if ($leftLetterOk eq $rightLetterOk) { #SOLID mate-pairs
 | 
| 
 | 
  2509 	$leftLetterOk = 'R';
 | 
| 
 | 
  2510 	$rightLetterOk = 'F';
 | 
| 
 | 
  2511 	@standardArray1 = translateSolidToRF($strand1,$ends_order1);
 | 
| 
 | 
  2512 	my @arr = getOrderedStrands($strand2,$order2); 
 | 
| 
 | 
  2513 	my @ends2 = getOrderedStrands($ends_order2,$order2); 
 | 
| 
 | 
  2514 	@standardArray2 = translateSolidToRF(\@arr,\@ends2);  
 | 
| 
 | 
  2515 
 | 
| 
 | 
  2516     } else {
 | 
| 
 | 
  2517 	@standardArray1 = @{$strand1};
 | 
| 
 | 
  2518 	@standardArray2 = getOrderedStrands($strand2,$order2);
 | 
| 
 | 
  2519     }
 | 
| 
 | 
  2520    
 | 
| 
 | 
  2521     #we will try 4 possibilities, 2 for each end of the link: RFRR-FFF->RFFFF , RFRR-FFF->RRRFFF
 | 
| 
 | 
  2522    
 | 
| 
 | 
  2523     #for the first end:   
 | 
| 
 | 
  2524    
 | 
| 
 | 
  2525     my @array = @standardArray1; 
 | 
| 
 | 
  2526     my %badInFRSense1;
 | 
| 
 | 
  2527     for my $i (1..scalar (@array)-1){ # FRFRFFFF -> FFFFFF and RRFRFRFFFF -> RRFFFFFF
 | 
| 
 | 
  2528 	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
 | 
| 
 | 
  2529 	    $badInFRSense1{$i}=1;
 | 
| 
 | 
  2530 	    $array[$i] = $rightLetterOk;
 | 
| 
 | 
  2531 	}
 | 
| 
 | 
  2532     }    
 | 
| 
 | 
  2533     my $numberRRRFFF_or_FFF_1 = scalar(@array)-scalar(keys %badInFRSense1);
 | 
| 
 | 
  2534     @array = @standardArray1;    
 | 
| 
 | 
  2535     my %badInFRSense0;
 | 
| 
 | 
  2536     for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFFRR -> FFFFFFRR
 | 
| 
 | 
  2537 	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
 | 
| 
 | 
  2538 	    $badInFRSense0{$i-1}=1;
 | 
| 
 | 
  2539 	    $array[$i-1] = $leftLetterOk;
 | 
| 
 | 
  2540 
 | 
| 
 | 
  2541 	}
 | 
| 
 | 
  2542     }
 | 
| 
 | 
  2543     my $numberRRF1 = scalar(@array)-scalar(keys %badInFRSense0);
 | 
| 
 | 
  2544     
 | 
| 
 | 
  2545     #for the second end:   
 | 
| 
 | 
  2546     @array = @standardArray2;
 | 
| 
 | 
  2547  
 | 
| 
 | 
  2548     my %badInFRSense3;
 | 
| 
 | 
  2549     for my $i (1..scalar(@array)-1){
 | 
| 
 | 
  2550 	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
 | 
| 
 | 
  2551 	    $badInFRSense3{$order2->[$i]}=1;
 | 
| 
 | 
  2552 	    $array[$i] = $rightLetterOk;
 | 
| 
 | 
  2553 	}
 | 
| 
 | 
  2554     }
 | 
| 
 | 
  2555     my $numberRRRFFF_or_FFF_2 = scalar(@array)-scalar(keys %badInFRSense3);
 | 
| 
 | 
  2556 
 | 
| 
 | 
  2557     @array = @standardArray2;
 | 
| 
 | 
  2558     my %badInFRSense5;
 | 
| 
 | 
  2559     for my $i (reverse(1..scalar (@array)-1)){ # FRFRFFFF -> FFFFFF
 | 
| 
 | 
  2560 	if ($array[$i-1] eq $rightLetterOk && $array[$i] eq $leftLetterOk) {
 | 
| 
 | 
  2561 	    $badInFRSense5{$i-1}=1;
 | 
| 
 | 
  2562 	    $array[$i-1] = $leftLetterOk;
 | 
| 
 | 
  2563 	}
 | 
| 
 | 
  2564     }
 | 
| 
 | 
  2565     my $numberRRF2 = scalar(@array)-scalar(keys %badInFRSense5);  
 | 
| 
 | 
  2566     
 | 
| 
 | 
  2567     if ($numberRRF1>=$numberRRRFFF_or_FFF_1 && $numberRRF1 >= $numberRRRFFF_or_FFF_2 && $numberRRF1 >=$numberRRF2) {
 | 
| 
 | 
  2568 	return (1,%badInFRSense0);
 | 
| 
 | 
  2569     }   
 | 
| 
 | 
  2570     
 | 
| 
 | 
  2571     if ($numberRRRFFF_or_FFF_1 >=$numberRRF1 && $numberRRRFFF_or_FFF_1 >= $numberRRRFFF_or_FFF_2 && $numberRRRFFF_or_FFF_1 >= $numberRRF2) {
 | 
| 
 | 
  2572 	return (1,%badInFRSense1);
 | 
| 
 | 
  2573     }
 | 
| 
 | 
  2574     
 | 
| 
 | 
  2575     if ($numberRRRFFF_or_FFF_2 >= $numberRRF1 && $numberRRRFFF_or_FFF_2 >= $numberRRRFFF_or_FFF_1 && $numberRRRFFF_or_FFF_2 >=$numberRRF2) {
 | 
| 
 | 
  2576 	return (2,%badInFRSense3);
 | 
| 
 | 
  2577     }
 | 
| 
 | 
  2578     
 | 
| 
 | 
  2579     if ($numberRRF2 >=  $numberRRF1 && $numberRRF2 >= $numberRRRFFF_or_FFF_1 && $numberRRF2 >= $numberRRRFFF_or_FFF_2 ) {
 | 
| 
 | 
  2580 	return (2,%badInFRSense5);
 | 
| 
 | 
  2581     }  
 | 
| 
 | 
  2582     
 | 
| 
 | 
  2583     #should not get here:
 | 
| 
 | 
  2584     print STDERR "Error in findBadInFRSenseSOLiDSolexa()!\n";
 | 
| 
 | 
  2585     return (1,%badInFRSense1); 
 | 
| 
 | 
  2586 }
 | 
| 
 | 
  2587 
 | 
| 
 | 
  2588 sub getOrderedStrands {
 | 
| 
 | 
  2589     my ($strand,$order) = ($_[0],$_[1]);
 | 
| 
 | 
  2590     my @arr;
 | 
| 
 | 
  2591     for my $i (0..scalar(@{$strand})-1) {
 | 
| 
 | 
  2592 	push(@arr,$strand->[$order->[$i]-1]); 
 | 
| 
 | 
  2593     }    
 | 
| 
 | 
  2594     return @arr; 
 | 
| 
 | 
  2595 }
 | 
| 
 | 
  2596 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2597 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2598 sub checkClusters {
 | 
| 
 | 
  2599     
 | 
| 
 | 
  2600     my ($ifRenv,$coord_start_chr1_cluster1,$coord_start_chr1_cluster2,$coord_start_chr2_cluster1,$coord_start_chr2_cluster2) = @_;
 | 
| 
 | 
  2601     if ($ifRenv eq 'REVERSE_SENSE') {
 | 
| 
 | 
  2602 	if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
 | 
| 
 | 
  2603 	    return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0; 
 | 
| 
 | 
  2604 	}
 | 
| 
 | 
  2605 	return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0; 
 | 
| 
 | 
  2606     }
 | 
| 
 | 
  2607     #if NORM
 | 
| 
 | 
  2608     if ($coord_start_chr1_cluster1 <= $coord_start_chr1_cluster2) {
 | 
| 
 | 
  2609         return ($coord_start_chr2_cluster1 >= $coord_start_chr2_cluster2)?1:0; 
 | 
| 
 | 
  2610     }
 | 
| 
 | 
  2611     return ($coord_start_chr2_cluster1 <= $coord_start_chr2_cluster2)?1:0; 
 | 
| 
 | 
  2612 }
 | 
| 
 | 
  2613 
 | 
| 
 | 
  2614 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2615 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2616 sub translateSolidToRF {
 | 
| 
 | 
  2617     my ($strandArr,$ends_orderArr)=@_;
 | 
| 
 | 
  2618     my @array;
 | 
| 
 | 
  2619     for my $i (0..scalar(@{$strandArr})-1) {					
 | 
| 
 | 
  2620 	if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'F') {
 | 
| 
 | 
  2621 	    push(@array,'F');
 | 
| 
 | 
  2622 	}
 | 
| 
 | 
  2623 	if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'F') {
 | 
| 
 | 
  2624 	    push(@array,'R');
 | 
| 
 | 
  2625 	}
 | 
| 
 | 
  2626 	if ($ends_orderArr->[$i]==1 && $strandArr->[$i] eq 'R') {
 | 
| 
 | 
  2627 	    push(@array,'R');   
 | 
| 
 | 
  2628 	}
 | 
| 
 | 
  2629 	if ($ends_orderArr->[$i]==2 && $strandArr->[$i] eq 'R') {
 | 
| 
 | 
  2630 	    push(@array,'F');    
 | 
| 
 | 
  2631 	}
 | 
| 
 | 
  2632     }
 | 
| 
 | 
  2633     return @array;
 | 
| 
 | 
  2634 }
 | 
| 
 | 
  2635 
 | 
| 
 | 
  2636 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2637 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2638 #convert the links file to the circos format
 | 
| 
 | 
  2639 sub links2segdup{
 | 
| 
 | 
  2640     
 | 
| 
 | 
  2641     my($id,$color_code,$links_file,$segdup_file)=@_;
 | 
| 
 | 
  2642     
 | 
| 
 | 
  2643     print LOG "# Converting to the circos format...\n";
 | 
| 
 | 
  2644     
 | 
| 
 | 
  2645     tie (my %hcolor,'Tie::IxHash');						#color-code hash table
 | 
| 
 | 
  2646     foreach my $col (keys %{$color_code}){
 | 
| 
 | 
  2647 	my ($min_links,$max_links)=split(",",$color_code->{$col});
 | 
| 
 | 
  2648 	$hcolor{$col}=[$min_links,$max_links];
 | 
| 
 | 
  2649     }
 | 
| 
 | 
  2650     
 | 
| 
 | 
  2651     open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  2652     open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
 | 
| 
 | 
  2653     
 | 
| 
 | 
  2654     my $index=1;
 | 
| 
 | 
  2655     while(<LINKS>){
 | 
| 
 | 
  2656 	
 | 
| 
 | 
  2657 	my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6];
 | 
| 
 | 
  2658 	
 | 
| 
 | 
  2659 	my $color=getColor($count,\%hcolor,"circos");				#get the color-code according the number of links
 | 
| 
 | 
  2660 	
 | 
| 
 | 
  2661 	print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n".	#circos output
 | 
| 
 | 
  2662 		     "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n";
 | 
| 
 | 
  2663 	$index++;
 | 
| 
 | 
  2664     }
 | 
| 
 | 
  2665     
 | 
| 
 | 
  2666     close LINKS;
 | 
| 
 | 
  2667     close SEGDUP;
 | 
| 
 | 
  2668     print LOG "-- output created: $segdup_file\n";
 | 
| 
 | 
  2669 }
 | 
| 
 | 
  2670 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2671 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2672 #convert the links file to the bedPE format for BEDTools usage
 | 
| 
 | 
  2673 sub links2bedPElinksfile{
 | 
| 
 | 
  2674 
 | 
| 
 | 
  2675     my ($sample,$links_file,$bedpe_file)=@_;
 | 
| 
 | 
  2676     
 | 
| 
 | 
  2677     open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n";
 | 
| 
 | 
  2678     open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n";
 | 
| 
 | 
  2679     
 | 
| 
 | 
  2680     my $nb_links=1;
 | 
| 
 | 
  2681     
 | 
| 
 | 
  2682     while(<LINKS>){
 | 
| 
 | 
  2683 	
 | 
| 
 | 
  2684 	chomp;
 | 
| 
 | 
  2685 	my @t=split("\t",$_);
 | 
| 
 | 
  2686 	my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6);
 | 
| 
 | 
  2687 	my $type=($chr1 eq $chr2)? "INTRA":"INTER";
 | 
| 
 | 
  2688 	$type.="_".$t[10];
 | 
| 
 | 
  2689 	
 | 
| 
 | 
  2690 	$start1--; $start2--;
 | 
| 
 | 
  2691 	
 | 
| 
 | 
  2692 	print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2".
 | 
| 
 | 
  2693 	"\t$sample"."_link$nb_links\t$type\t.\t.".
 | 
| 
 | 
  2694 	"\t".join("|",@t)."\n";
 | 
| 
 | 
  2695 	
 | 
| 
 | 
  2696 	$nb_links++;
 | 
| 
 | 
  2697     }
 | 
| 
 | 
  2698     
 | 
| 
 | 
  2699     close LINKS;
 | 
| 
 | 
  2700     close BEDPE;
 | 
| 
 | 
  2701 
 | 
| 
 | 
  2702 }
 | 
| 
 | 
  2703 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2704 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2705 sub bedPElinks2linksfile{
 | 
| 
 | 
  2706 
 | 
| 
 | 
  2707     my ($bedpe_file,$links_file)=@_;
 | 
| 
 | 
  2708  
 | 
| 
 | 
  2709     open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n";
 | 
| 
 | 
  2710     open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n";
 | 
| 
 | 
  2711     
 | 
| 
 | 
  2712     while(<BEDPE>){
 | 
| 
 | 
  2713 	
 | 
| 
 | 
  2714 	chomp;
 | 
| 
 | 
  2715 	my $sample=(split("_",(split("\t",$_))[6]))[0];
 | 
| 
 | 
  2716 	my @t1=(split("\t",$_))[0,1,2,3,4,5];
 | 
| 
 | 
  2717 	my @t2=split(/\|/,(split("\t",$_))[10]);
 | 
| 
 | 
  2718 	push(@t2,$sample);
 | 
| 
 | 
  2719 	
 | 
| 
 | 
  2720 	print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n";
 | 
| 
 | 
  2721 	
 | 
| 
 | 
  2722     }
 | 
| 
 | 
  2723     close BEDPE;
 | 
| 
 | 
  2724     close LINKS;
 | 
| 
 | 
  2725 
 | 
| 
 | 
  2726 }
 | 
| 
 | 
  2727 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2728 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2729 #convert the links file to the bed format
 | 
| 
 | 
  2730 sub links2bedfile{
 | 
| 
 | 
  2731     
 | 
| 
 | 
  2732     my ($tag_length,$color_code,$links_file,$bed_file)=@_;
 | 
| 
 | 
  2733     
 | 
| 
 | 
  2734     print LOG "# Converting to the bed format...\n";
 | 
| 
 | 
  2735     
 | 
| 
 | 
  2736     my $compare=1;
 | 
| 
 | 
  2737     if($links_file!~/compared$/){
 | 
| 
 | 
  2738 	$compare=0;
 | 
| 
 | 
  2739 	$tag_length->{none}->{1}=$tag_length->{1};
 | 
| 
 | 
  2740 	$tag_length->{none}->{2}=$tag_length->{2};
 | 
| 
 | 
  2741     }
 | 
| 
 | 
  2742     
 | 
| 
 | 
  2743     #color-code hash table
 | 
| 
 | 
  2744     tie (my %hcolor,'Tie::IxHash');
 | 
| 
 | 
  2745     my %color_order;
 | 
| 
 | 
  2746     my $n=1;
 | 
| 
 | 
  2747     foreach my $col (keys %{$color_code}){
 | 
| 
 | 
  2748 	my ($min_links,$max_links)=split(",",$color_code->{$col});
 | 
| 
 | 
  2749 	$hcolor{$col}=[$min_links,$max_links];
 | 
| 
 | 
  2750 	$color_order{$col}=$n;
 | 
| 
 | 
  2751 	$n++;
 | 
| 
 | 
  2752     }
 | 
| 
 | 
  2753     
 | 
| 
 | 
  2754     my %pair;
 | 
| 
 | 
  2755     my %pt;
 | 
| 
 | 
  2756     $n=1;
 | 
| 
 | 
  2757     open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
 | 
| 
 | 
  2758     
 | 
| 
 | 
  2759     my %str=( "F"=>"+", "R"=>"-" );
 | 
| 
 | 
  2760 
 | 
| 
 | 
  2761     while(<LINKS>){
 | 
| 
 | 
  2762 	
 | 
| 
 | 
  2763 	my @t=split;
 | 
| 
 | 
  2764 	my $sample=($compare)? pop(@t):"none";
 | 
| 
 | 
  2765 	
 | 
| 
 | 
  2766 	my $chr1=$t[0]; 
 | 
| 
 | 
  2767 	my $chr2=$t[3];
 | 
| 
 | 
  2768 	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
 | 
| 
 | 
  2769 	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
 | 
| 
 | 
  2770 	my $same_chr=($chr1 eq $chr2)? 1:0;
 | 
| 
 | 
  2771 	
 | 
| 
 | 
  2772 	my $count=$t[6];
 | 
| 
 | 
  2773 	my $color=getColor($count,\%hcolor,"bed");
 | 
| 
 | 
  2774 	
 | 
| 
 | 
  2775 	my @pairs=deleteBadOrderSensePairs(split(",",$t[7]));
 | 
| 
 | 
  2776 	my @strand1=deleteBadOrderSensePairs(split(",",$t[8]));
 | 
| 
 | 
  2777 	my @strand2=deleteBadOrderSensePairs(split(",",$t[9]));
 | 
| 
 | 
  2778 	my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10]));
 | 
| 
 | 
  2779 	my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11]));
 | 
| 
 | 
  2780 	my @position1=deleteBadOrderSensePairs(split(",",$t[14]));
 | 
| 
 | 
  2781 	my @position2=deleteBadOrderSensePairs(split(",",$t[15]));
 | 
| 
 | 
  2782 	my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample});
 | 
| 
 | 
  2783 	my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample});
 | 
| 
 | 
  2784 
 | 
| 
 | 
  2785 	
 | 
| 
 | 
  2786 	for my $p (0..$#pairs){						
 | 
| 
 | 
  2787 	    
 | 
| 
 | 
  2788 	    if (!exists $pair{$pairs[$p]}){
 | 
| 
 | 
  2789 		
 | 
| 
 | 
  2790 		if($same_chr){
 | 
| 
 | 
  2791 		    
 | 
| 
 | 
  2792 		    $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]},
 | 
| 
 | 
  2793 				    $start1[$p]-1, $end2[$p], $color,
 | 
| 
 | 
  2794 				    2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ];    
 | 
| 
 | 
  2795 		    $pt{$n}=$pair{$pairs[$p]}->{0};
 | 
| 
 | 
  2796 		    $n++;
 | 
| 
 | 
  2797 		    
 | 
| 
 | 
  2798 		}else{
 | 
| 
 | 
  2799 		    
 | 
| 
 | 
  2800 		    $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]},
 | 
| 
 | 
  2801 						$start1[$p]-1, $end1[$p], $color,
 | 
| 
 | 
  2802 						1, $tag_length->{$sample}->{$ends_order1[$p]}, 0];
 | 
| 
 | 
  2803 		    $pt{$n}=$pair{$pairs[$p]}->{1};
 | 
| 
 | 
  2804 		    $n++;
 | 
| 
 | 
  2805 		    
 | 
| 
 | 
  2806 		    
 | 
| 
 | 
  2807 		    $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]},
 | 
| 
 | 
  2808 						$start2[$p]-1, $end2[$p], $color,
 | 
| 
 | 
  2809 						1, $tag_length->{$sample}->{$ends_order2[$p]}, 0];
 | 
| 
 | 
  2810 		    $pt{$n}=$pair{$pairs[$p]}->{2};
 | 
| 
 | 
  2811 		    $n++;
 | 
| 
 | 
  2812 		}
 | 
| 
 | 
  2813 	    }else{
 | 
| 
 | 
  2814 		
 | 
| 
 | 
  2815 		if($same_chr){
 | 
| 
 | 
  2816 		    ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]});
 | 
| 
 | 
  2817 		}else{
 | 
| 
 | 
  2818 		    ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]});
 | 
| 
 | 
  2819 		    ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]});
 | 
| 
 | 
  2820 		}
 | 
| 
 | 
  2821 	    }
 | 
| 
 | 
  2822 	}
 | 
| 
 | 
  2823     }
 | 
| 
 | 
  2824     close LINKS;
 | 
| 
 | 
  2825     
 | 
| 
 | 
  2826     my $nb_pairs=$n-1;
 | 
| 
 | 
  2827     
 | 
| 
 | 
  2828     open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
 | 
| 
 | 
  2829     print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ".
 | 
| 
 | 
  2830 	      "visibility=2 itemRgb=\"On\"\n";
 | 
| 
 | 
  2831     
 | 
| 
 | 
  2832     for my $i (1..$nb_pairs){
 | 
| 
 | 
  2833 	print BED join("\t",@{$pt{$i}})."\n";
 | 
| 
 | 
  2834     }
 | 
| 
 | 
  2835     
 | 
| 
 | 
  2836     close BED;
 | 
| 
 | 
  2837     
 | 
| 
 | 
  2838     print LOG "-- output created: $bed_file\n";
 | 
| 
 | 
  2839     
 | 
| 
 | 
  2840     undef %pair;
 | 
| 
 | 
  2841     undef %pt;
 | 
| 
 | 
  2842     
 | 
| 
 | 
  2843 }
 | 
| 
 | 
  2844 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2845 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2846 sub deleteBadOrderSensePairs{
 | 
| 
 | 
  2847     
 | 
| 
 | 
  2848     my (@tab)=@_;
 | 
| 
 | 
  2849     my @tab2;
 | 
| 
 | 
  2850 
 | 
| 
 | 
  2851     foreach my $v (@tab){
 | 
| 
 | 
  2852 	
 | 
| 
 | 
  2853 	$v=~s/[\(\)]//g;
 | 
| 
 | 
  2854 	push(@tab2,$v) if($v!~/[\$\*\@]$/);
 | 
| 
 | 
  2855     }
 | 
| 
 | 
  2856     return @tab2;
 | 
| 
 | 
  2857 }
 | 
| 
 | 
  2858 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2859 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2860 sub getAllEntries{    
 | 
| 
 | 
  2861     my (@tab)=split (/,/,$_[0]);
 | 
| 
 | 
  2862     my @tab2;
 | 
| 
 | 
  2863 
 | 
| 
 | 
  2864     foreach my $v (@tab){
 | 
| 
 | 
  2865 	
 | 
| 
 | 
  2866 	$v=~s/[\(\)]//g;
 | 
| 
 | 
  2867 	push(@tab2,$v);
 | 
| 
 | 
  2868     }
 | 
| 
 | 
  2869     return @tab2;
 | 
| 
 | 
  2870 }#------------------------------------------------------------------------------#
 | 
| 
 | 
  2871 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2872 sub getAllEntriesWOspecialChar{    
 | 
| 
 | 
  2873     my (@tab)=split (/,/,$_[0]);
 | 
| 
 | 
  2874     my @tab2;
 | 
| 
 | 
  2875 
 | 
| 
 | 
  2876     foreach my $v (@tab){
 | 
| 
 | 
  2877 	
 | 
| 
 | 
  2878 	$v=~s/[\(\)\$\*\@]//g;
 | 
| 
 | 
  2879 	push(@tab2,$v);
 | 
| 
 | 
  2880     }
 | 
| 
 | 
  2881     return @tab2;
 | 
| 
 | 
  2882 }
 | 
| 
 | 
  2883 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2884 #------------------------------------------------------------------------------#
 | 
| 
 | 
  2885 sub links2SVfile{
 | 
| 
 | 
  2886     
 | 
| 
 | 
  2887     my($links_file,$sv_file)=@_;
 | 
| 
 | 
  2888     
 | 
| 
 | 
  2889     print LOG "# Converting to the sv output table...\n";
 | 
| 
 | 
  2890     open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n";
 | 
| 
 | 
  2891     open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
 | 
| 
 | 
  2892     
 | 
| 
 | 
  2893     my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist
 | 
| 
 | 
  2894     chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering
 | 
| 
 | 
  2895     final_score breakpoint1_start1-end1 breakpoint2_start2-end2);
 | 
| 
 | 
  2896     
 | 
| 
 | 
  2897     my $nb_links=0;
 | 
| 
 | 
  2898     
 | 
| 
 | 
  2899     while (<LINKS>){
 | 
| 
 | 
  2900 	
 | 
| 
 | 
  2901 	my @t=split;
 | 
| 
 | 
  2902 	my @sv=();
 | 
| 
 | 
  2903 	my $sv_type="-";
 | 
| 
 | 
  2904 	my $strand_ratio="-";
 | 
| 
 | 
  2905 	my $eq_ratio="-";
 | 
| 
 | 
  2906 	my $eq_type="-";
 | 
| 
 | 
  2907 	my $insert_ratio="-";
 | 
| 
 | 
  2908 	my $link="-";
 | 
| 
 | 
  2909 	my ($bk1, $bk2)=("-","-");
 | 
| 
 | 
  2910 	my $score="-";
 | 
| 
 | 
  2911 	
 | 
| 
 | 
  2912 	my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]); 
 | 
| 
 | 
  2913 	my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]);
 | 
| 
 | 
  2914 	my $nb_pairs=$t[6];
 | 
| 
 | 
  2915 	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i);
 | 
| 
 | 
  2916 	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i);
 | 
| 
 | 
  2917 	my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER";
 | 
| 
 | 
  2918 	
 | 
| 
 | 
  2919 	#if strand filtering
 | 
| 
 | 
  2920 	if (defined $t[16]){
 | 
| 
 | 
  2921 	    #if inter-chr link
 | 
| 
 | 
  2922 	    $sv_type=$t[16];
 | 
| 
 | 
  2923 	    if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){
 | 
| 
 | 
  2924 		$strand_ratio=floor($1/$2*100)."%";
 | 
| 
 | 
  2925 		$score=$t[18];
 | 
| 
 | 
  2926 	    }
 | 
| 
 | 
  2927 	    if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){
 | 
| 
 | 
  2928 	    #if intra-chr link with insert size filtering
 | 
| 
 | 
  2929 		$strand_ratio=floor($1/$2*100)."%";
 | 
| 
 | 
  2930 		$link=floor($t[17]);
 | 
| 
 | 
  2931 		if($sv_type!~/^INV/){
 | 
| 
 | 
  2932 		    $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
 | 
| 
 | 
  2933 		    $score=$t[20];
 | 
| 
 | 
  2934 		}else{
 | 
| 
 | 
  2935 		    $score=$t[19];
 | 
| 
 | 
  2936 		}
 | 
| 
 | 
  2937 	    }
 | 
| 
 | 
  2938 	}
 | 
| 
 | 
  2939 	
 | 
| 
 | 
  2940 	if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){
 | 
| 
 | 
  2941 	    
 | 
| 
 | 
  2942 	    #if strand and order filtering only and/or interchr link
 | 
| 
 | 
  2943 	    $eq_type=$t[18];
 | 
| 
 | 
  2944 	    $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/);
 | 
| 
 | 
  2945 	    ($bk1, $bk2)=($t[20],$t[21]);
 | 
| 
 | 
  2946 	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
 | 
| 
 | 
  2947 	    $score=$t[22];
 | 
| 
 | 
  2948 	    
 | 
| 
 | 
  2949 	}elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){
 | 
| 
 | 
  2950 	    
 | 
| 
 | 
  2951 	    #if all three filtering
 | 
| 
 | 
  2952 	    $link=floor($t[17]);
 | 
| 
 | 
  2953 	    $eq_type=$t[19];
 | 
| 
 | 
  2954 	    $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/);
 | 
| 
 | 
  2955 	    
 | 
| 
 | 
  2956 	    if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){
 | 
| 
 | 
  2957 		$insert_ratio=floor($1/$2*100)."%";
 | 
| 
 | 
  2958 		($bk1, $bk2)=($t[22],$t[23]);
 | 
| 
 | 
  2959 		$score=$t[24];
 | 
| 
 | 
  2960 		
 | 
| 
 | 
  2961 	    }else{
 | 
| 
 | 
  2962 		($bk1, $bk2)=($t[21],$t[22]);
 | 
| 
 | 
  2963 		$score=$t[23];
 | 
| 
 | 
  2964 	    }
 | 
| 
 | 
  2965 	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;}
 | 
| 
 | 
  2966 	    
 | 
| 
 | 
  2967 	}
 | 
| 
 | 
  2968 	
 | 
| 
 | 
  2969 	
 | 
| 
 | 
  2970 	push(@sv, $chr_type, $sv_type,$eq_type);
 | 
| 
 | 
  2971 	push(@sv,"$chr1\t$start1-$end1");
 | 
| 
 | 
  2972 	push(@sv, $link);
 | 
| 
 | 
  2973 	push(@sv,"$chr2\t$start2-$end2",
 | 
| 
 | 
  2974 	     $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2);
 | 
| 
 | 
  2975 	
 | 
| 
 | 
  2976 	
 | 
| 
 | 
  2977 	print SV join("\t",@sv)."\n";
 | 
| 
 | 
  2978     }
 | 
| 
 | 
  2979     
 | 
| 
 | 
  2980     close LINKS;
 | 
| 
 | 
  2981     close SV;
 | 
| 
 | 
  2982     
 | 
| 
 | 
  2983     system "sort  -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted";
 | 
| 
 | 
  2984     
 | 
| 
 | 
  2985     open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n";
 | 
| 
 | 
  2986     my @links=<SV>;
 | 
| 
 | 
  2987     close SV;
 | 
| 
 | 
  2988     
 | 
| 
 | 
  2989     open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n";
 | 
| 
 | 
  2990     
 | 
| 
 | 
  2991     print SV join("\t",@header)."\n";
 | 
| 
 | 
  2992     print SV @links;
 | 
| 
 | 
  2993     close SV;
 | 
| 
 | 
  2994     
 | 
| 
 | 
  2995     unlink($sv_file.".sorted");
 | 
| 
 | 
  2996   
 | 
| 
 | 
  2997     print LOG "-- output created: $sv_file\n";
 | 
| 
 | 
  2998     
 | 
| 
 | 
  2999 }
 | 
| 
 | 
  3000 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3001 sub densityCalculation{
 | 
| 
 | 
  3002     
 | 
| 
 | 
  3003     my ($chr,$chrID,$file,$tag_length,$window_dist,$step,$mates_file,$mates_file_ref,$density_file,$input_format)=@_;
 | 
| 
 | 
  3004     
 | 
| 
 | 
  3005     my @sfile=split(/\./,$$mates_file[$file]);
 | 
| 
 | 
  3006     my $fchr=$sfile[$#sfile];
 | 
| 
 | 
  3007     
 | 
| 
 | 
  3008     my $fh = new FileHandle;
 | 
| 
 | 
  3009     
 | 
| 
 | 
  3010     my %density;
 | 
| 
 | 
  3011     my %density_ref;
 | 
| 
 | 
  3012     my @ratio;
 | 
| 
 | 
  3013     my ($cov,$cov_ref);
 | 
| 
 | 
  3014   
 | 
| 
 | 
  3015     #FREQUENCY CALCULATION PROCEDURE
 | 
| 
 | 
  3016     print LOG "# $fchr : Frequency calculation procedure...\n";
 | 
| 
 | 
  3017     &FreqCalculation(\%density,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file[$file],$input_format);
 | 
| 
 | 
  3018     &FreqCalculation(\%density_ref,$chr,$chrID,$tag_length,$window_dist,$step,$$mates_file_ref[$file],$input_format);
 | 
| 
 | 
  3019    
 | 
| 
 | 
  3020     #MAKING RATIO AND OUTPUT
 | 
| 
 | 
  3021     print LOG "\# Ratio calculation procedure...\n";
 | 
| 
 | 
  3022     $density_file=~s/\/mates\//\/density\//;
 | 
| 
 | 
  3023     $fh->open(">".$density_file) or die "$0: can't write in the output ".$density_file." :$!\n";
 | 
| 
 | 
  3024     
 | 
| 
 | 
  3025     foreach my $k (1..$chr->{nb_chrs}){
 | 
| 
 | 
  3026 	foreach my $frag (1..$chr->{$k}->{nb_frag}){
 | 
| 
 | 
  3027 		
 | 
| 
 | 
  3028 	    @ratio= ($chr->{$k}->{name},
 | 
| 
 | 
  3029 		    (${$chr->{$k}->{$frag}}[0]+1),
 | 
| 
 | 
  3030 		    (${$chr->{$k}->{$frag}}[1]+1));
 | 
| 
 | 
  3031 		
 | 
| 
 | 
  3032 	    $cov=(exists $density{$k}{$frag}->{count})? $density{$k}{$frag}->{count}:0;
 | 
| 
 | 
  3033 	    $cov_ref=(exists $density_ref{$k}{$frag}->{count})? $density_ref{$k}{$frag}->{count}:0;
 | 
| 
 | 
  3034 		
 | 
| 
 | 
  3035 	    push(@ratio,$cov,$cov_ref);
 | 
| 
 | 
  3036 	    push(@ratio,log($cov/$cov_ref)) if($cov && $cov_ref);
 | 
| 
 | 
  3037 	    push(@ratio,-log($cov_ref+1)) if(!$cov && $cov_ref);
 | 
| 
 | 
  3038 	    push(@ratio,log($cov+1)) if($cov && !$cov_ref);
 | 
| 
 | 
  3039 	    next if(!$cov && !$cov_ref);
 | 
| 
 | 
  3040 		
 | 
| 
 | 
  3041 	    print $fh join("\t",@ratio)."\n";
 | 
| 
 | 
  3042 	}
 | 
| 
 | 
  3043     }
 | 
| 
 | 
  3044     
 | 
| 
 | 
  3045     $fh->close;
 | 
| 
 | 
  3046     print LOG "-- output created: $density_file\n";
 | 
| 
 | 
  3047     
 | 
| 
 | 
  3048     undef %density;
 | 
| 
 | 
  3049     undef %density_ref;
 | 
| 
 | 
  3050 }
 | 
| 
 | 
  3051 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3052 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3053 sub FreqCalculation{
 | 
| 
 | 
  3054     
 | 
| 
 | 
  3055     my ($density,$chr,$chrID,$tag_length,$window_dist,$step,$mates_file,$input_format) = @_;
 | 
| 
 | 
  3056     
 | 
| 
 | 
  3057     my @sfile=split(/\./,$mates_file);
 | 
| 
 | 
  3058     my $fchr=$sfile[$#sfile];
 | 
| 
 | 
  3059     my $fh = new FileHandle;
 | 
| 
 | 
  3060     
 | 
| 
 | 
  3061     my $nb_windows=0;
 | 
| 
 | 
  3062     my $warn=100000;
 | 
| 
 | 
  3063     my $record=0;
 | 
| 
 | 
  3064     my %pair;
 | 
| 
 | 
  3065  
 | 
| 
 | 
  3066     my ($sumX,$sumX2) = (0,0);
 | 
| 
 | 
  3067  
 | 
| 
 | 
  3068     print LOG "\# Frequency calculation for $mates_file...\n";
 | 
| 
 | 
  3069  
 | 
| 
 | 
  3070      if ($mates_file =~ /.gz$/) {
 | 
| 
 | 
  3071 	$fh->open("gunzip -c $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";
 | 
| 
 | 
  3072     }elsif($mates_file =~ /.bam$/){
 | 
| 
 | 
  3073 	o$fh->open("$SAMTOOLS_BIN_DIR/samtools view $mates_file |") or die "$0: can't open ".$mates_file.":$!\n";#GALAXY
 | 
| 
 | 
  3074     }else{
 | 
| 
 | 
  3075 	$fh->open("<".$mates_file) or die "$0: can't open ".$mates_file.":$!\n";
 | 
| 
 | 
  3076     }
 | 
| 
 | 
  3077     
 | 
| 
 | 
  3078     while(<$fh>){
 | 
| 
 | 
  3079 	
 | 
| 
 | 
  3080 	my @t=split;
 | 
| 
 | 
  3081 	my $mate=$t[0];
 | 
| 
 | 
  3082 	    
 | 
| 
 | 
  3083 	my ($chr_read1, $chr_read2, $firstbase_read1, $firstbase_read2, $end_order_read1, $end_order_read2,);
 | 
| 
 | 
  3084 	
 | 
| 
 | 
  3085 	next if(exists $pair{$mate});
 | 
| 
 | 
  3086 	
 | 
| 
 | 
  3087 	next if (!readMateFile(\$chr_read1, \$chr_read2, \$firstbase_read1, \$firstbase_read2,\$end_order_read1, \$end_order_read2, \@t, $input_format,$tag_length));
 | 
| 
 | 
  3088 	
 | 
| 
 | 
  3089 	next unless (exists $chrID->{$chr_read1} || exists $chrID->{$chr_read2}); 
 | 
| 
 | 
  3090 	($chr_read1, $chr_read2)= ($chrID->{$chr_read1},$chrID->{$chr_read2});
 | 
| 
 | 
  3091 	
 | 
| 
 | 
  3092 	$pair{$mate}=undef;
 | 
| 
 | 
  3093 	$record++;
 | 
| 
 | 
  3094 	
 | 
| 
 | 
  3095 	my ($coord_start_read1,$coord_end_read1, $coord_start_read2,$coord_end_read2);
 | 
| 
 | 
  3096 	
 | 
| 
 | 
  3097 	recupCoords($firstbase_read1,\$coord_start_read1,\$coord_end_read1,$tag_length->{$end_order_read1},$input_format);
 | 
| 
 | 
  3098 	recupCoords($firstbase_read2,\$coord_start_read2,\$coord_end_read2,$tag_length->{$end_order_read2},$input_format);
 | 
| 
 | 
  3099 	
 | 
| 
 | 
  3100 	my $length = abs($coord_start_read1-$coord_start_read2);
 | 
| 
 | 
  3101 	$sumX += $length;							#add to sum and sum^2 for mean and variance calculation
 | 
| 
 | 
  3102 	$sumX2 += $length*$length;	
 | 
| 
 | 
  3103 	
 | 
| 
 | 
  3104 	for(my $i=1;$i<=$chr->{$chr_read1}->{'nb_frag'};$i++){
 | 
| 
 | 
  3105 	    
 | 
| 
 | 
  3106 	    if (abs ($coord_start_read1-${$chr->{$chr_read1}->{$i}}[0]) <= $window_dist){
 | 
| 
 | 
  3107 		
 | 
| 
 | 
  3108 		&addToDensity($density,$chr_read1,$i,\$nb_windows)
 | 
| 
 | 
  3109 		if(overlap($coord_start_read1,$coord_end_read2,${$chr->{$chr_read1}->{$i}}[0],${$chr->{$chr_read1}->{$i}}[1]));
 | 
| 
 | 
  3110 		
 | 
| 
 | 
  3111 	    }else{
 | 
| 
 | 
  3112 		
 | 
| 
 | 
  3113 		$i=getNextFrag($coord_start_read1,$i,${$chr->{$chr_read1}->{$i}}[0],$chr->{$chr_read1}->{nb_frag},$window_dist,$step);
 | 
| 
 | 
  3114 	    }
 | 
| 
 | 
  3115 	}
 | 
| 
 | 
  3116 	
 | 
| 
 | 
  3117 	if($record>=$warn){
 | 
| 
 | 
  3118 	    print LOG "-- $warn mate-pairs analysed - $nb_windows points created\n";
 | 
| 
 | 
  3119 	    $warn+=100000;
 | 
| 
 | 
  3120 	}
 | 
| 
 | 
  3121     }
 | 
| 
 | 
  3122     $fh->close;
 | 
| 
 | 
  3123 
 | 
| 
 | 
  3124     print LOG "-- $fchr : Total : $record mate-pairs analysed - $nb_windows points created\n";
 | 
| 
 | 
  3125 
 | 
| 
 | 
  3126     if($record>0){
 | 
| 
 | 
  3127 	
 | 
| 
 | 
  3128 	my $mu = $sumX/$record;
 | 
| 
 | 
  3129 	my $sigma = sqrt($sumX2/$record - $mu*$mu);
 | 
| 
 | 
  3130 	print LOG "-- $fchr : mu length = $mu, sigma length = $sigma\n";
 | 
| 
 | 
  3131     }
 | 
| 
 | 
  3132 
 | 
| 
 | 
  3133 }
 | 
| 
 | 
  3134 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3135 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3136 sub ratio2segdup{
 | 
| 
 | 
  3137     
 | 
| 
 | 
  3138     my($id,$density_file,$segdup_file)=@_;
 | 
| 
 | 
  3139     
 | 
| 
 | 
  3140     print LOG "# Converting to circos format...\n";
 | 
| 
 | 
  3141         
 | 
| 
 | 
  3142     open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
 | 
| 
 | 
  3143     open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n";
 | 
| 
 | 
  3144   
 | 
| 
 | 
  3145     while(<RATIO>){
 | 
| 
 | 
  3146 	chomp;
 | 
| 
 | 
  3147 	my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];	
 | 
| 
 | 
  3148 	print SEGDUP "$id$chr1\t$start1\t$end1\t$ratio\n";
 | 
| 
 | 
  3149     }
 | 
| 
 | 
  3150     
 | 
| 
 | 
  3151     close RATIO;
 | 
| 
 | 
  3152     close SEGDUP;
 | 
| 
 | 
  3153     print LOG "-- output created: $segdup_file\n";
 | 
| 
 | 
  3154 }
 | 
| 
 | 
  3155 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3156 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3157 sub ratio2bedfile{
 | 
| 
 | 
  3158     
 | 
| 
 | 
  3159     my($density_file,$bed_file)=@_;
 | 
| 
 | 
  3160     
 | 
| 
 | 
  3161     print LOG "# Converting to bedGraph format...\n";
 | 
| 
 | 
  3162         
 | 
| 
 | 
  3163     open RATIO, "<$density_file" or die "$0: can't open $density_file :$!\n";
 | 
| 
 | 
  3164     open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n";
 | 
| 
 | 
  3165     print BED "track type=bedGraph name=\"$bed_file\" description=\"log ratios for cnv detection\" ".
 | 
| 
 | 
  3166 	      "visibility=2 color=255,0,0 alwaysZero=\"On\"\n";
 | 
| 
 | 
  3167     
 | 
| 
 | 
  3168     while(<RATIO>){
 | 
| 
 | 
  3169 	chomp;
 | 
| 
 | 
  3170 	my ($chr1,$start1,$end1,$ratio)=(split /\t/)[0,1,2,5];
 | 
| 
 | 
  3171 	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/);
 | 
| 
 | 
  3172 	print BED "$chr1\t".($start1-1)."\t$end1\t$ratio\n";
 | 
| 
 | 
  3173     }
 | 
| 
 | 
  3174     
 | 
| 
 | 
  3175     close RATIO;
 | 
| 
 | 
  3176     close BED;
 | 
| 
 | 
  3177     print LOG "-- output created: $bed_file\n";
 | 
| 
 | 
  3178 }
 | 
| 
 | 
  3179 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3180 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3181 sub inverseSense{
 | 
| 
 | 
  3182     
 | 
| 
 | 
  3183     my $mate_sense=$_[0];
 | 
| 
 | 
  3184     my %reverse=( 'F'  => 'R' , 'R'  => 'F' ,
 | 
| 
 | 
  3185 		  'FF' => 'RR', 'RR' => 'FF',			
 | 
| 
 | 
  3186 		  'FR' => 'RF', 'RF' => 'FR');
 | 
| 
 | 
  3187     return $reverse{$mate_sense};
 | 
| 
 | 
  3188 }
 | 
| 
 | 
  3189 
 | 
| 
 | 
  3190 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3191 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3192 sub getNextFrag{
 | 
| 
 | 
  3193     
 | 
| 
 | 
  3194     my ($read_start,$frag_num,$frag_start,$frag_last,$window_dist,$step)=@_;
 | 
| 
 | 
  3195     
 | 
| 
 | 
  3196     my $how_far = $read_start-$frag_start;
 | 
| 
 | 
  3197     my $nb_windows_toskip;
 | 
| 
 | 
  3198     
 | 
| 
 | 
  3199     if($how_far>0){
 | 
| 
 | 
  3200 	
 | 
| 
 | 
  3201 	$nb_windows_toskip=($how_far/$step)-($window_dist/$step);
 | 
| 
 | 
  3202 	$nb_windows_toskip=~ s/\..*//;
 | 
| 
 | 
  3203 	$nb_windows_toskip=0 if($nb_windows_toskip<0);
 | 
| 
 | 
  3204 	return ($frag_num + $nb_windows_toskip);
 | 
| 
 | 
  3205     }
 | 
| 
 | 
  3206     return $frag_last;
 | 
| 
 | 
  3207 }
 | 
| 
 | 
  3208 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3209 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3210 sub getColor{
 | 
| 
 | 
  3211 
 | 
| 
 | 
  3212     my($count,$hcolor,$format)=@_;
 | 
| 
 | 
  3213     for my $col ( keys % { $hcolor} ) {
 | 
| 
 | 
  3214        return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]);
 | 
| 
 | 
  3215     }
 | 
| 
 | 
  3216     return "white" if($format eq "circos");
 | 
| 
 | 
  3217     return "255,255,255" if($format eq "bed");
 | 
| 
 | 
  3218 }
 | 
| 
 | 
  3219 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3220 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3221 sub recupCoords{
 | 
| 
 | 
  3222     
 | 
| 
 | 
  3223     my($c_hit,$cs_hit,$ce_hit,$tag_length,$input_format)=@_;
 | 
| 
 | 
  3224     my $strand = 'F';
 | 
| 
 | 
  3225     
 | 
| 
 | 
  3226     if ($c_hit=~s/^\-//) {
 | 
| 
 | 
  3227 	$strand='R';
 | 
| 
 | 
  3228 	    $$cs_hit=$c_hit;
 | 
| 
 | 
  3229 	    $$ce_hit=$c_hit-($tag_length-1); 
 | 
| 
 | 
  3230     }else{
 | 
| 
 | 
  3231 	$$cs_hit=$c_hit;
 | 
| 
 | 
  3232 	$$ce_hit=$c_hit+($tag_length-1); 
 | 
| 
 | 
  3233     }
 | 
| 
 | 
  3234     return $strand;
 | 
| 
 | 
  3235     
 | 
| 
 | 
  3236 }
 | 
| 
 | 
  3237 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3238 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3239 sub overlap {
 | 
| 
 | 
  3240     my($cs_hit,$ce_hit,$cs_region,$ce_region)=@_;
 | 
| 
 | 
  3241     if( (($cs_hit < $cs_region) && ($ce_hit < $cs_region )) || (($cs_hit > $ce_region) && ($ce_hit > $ce_region )) ) {
 | 
| 
 | 
  3242         return 0;
 | 
| 
 | 
  3243     }
 | 
| 
 | 
  3244     return 1;
 | 
| 
 | 
  3245 }
 | 
| 
 | 
  3246 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3247 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3248 sub makeLink {
 | 
| 
 | 
  3249     
 | 
| 
 | 
  3250     my ($link,$chr1,$frag1,$chr2,$frag2,$mt,$nb)=@_;
 | 
| 
 | 
  3251 
 | 
| 
 | 
  3252     if($chr1>$chr2){
 | 
| 
 | 
  3253         ($chr1,$chr2)= ($chr2,$chr1);
 | 
| 
 | 
  3254         ($frag1,$frag2)= ($frag2,$frag1);
 | 
| 
 | 
  3255     }
 | 
| 
 | 
  3256     
 | 
| 
 | 
  3257      if($chr1 == $chr2){
 | 
| 
 | 
  3258         if($frag1>$frag2){
 | 
| 
 | 
  3259 	    ($frag1,$frag2)= ($frag2,$frag1);
 | 
| 
 | 
  3260 	}
 | 
| 
 | 
  3261     }
 | 
| 
 | 
  3262   
 | 
| 
 | 
  3263     if(!exists $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}){
 | 
| 
 | 
  3264         $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}=$mt;
 | 
| 
 | 
  3265 	$$nb++;
 | 
| 
 | 
  3266     }elsif($link->{$chr1}->{$chr2}->{$frag1}->{$frag2}!~/(^|,)$mt(,|$)/){
 | 
| 
 | 
  3267         $link->{$chr1}->{$chr2}->{$frag1}->{$frag2}.=",$mt";
 | 
| 
 | 
  3268     }
 | 
| 
 | 
  3269 }
 | 
| 
 | 
  3270 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3271 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3272 #fonction of adding the read to the density profile
 | 
| 
 | 
  3273 sub addToDensity {
 | 
| 
 | 
  3274     
 | 
| 
 | 
  3275     my ($density,$chr1,$frag1,$nb)=@_;
 | 
| 
 | 
  3276     
 | 
| 
 | 
  3277     if(!exists $density->{$chr1}->{$frag1}->{count}){
 | 
| 
 | 
  3278             $density->{$chr1}->{$frag1}->{count}=1;
 | 
| 
 | 
  3279 	    $$nb++;
 | 
| 
 | 
  3280     }else{
 | 
| 
 | 
  3281             $density->{$chr1}->{$frag1}->{count}++;
 | 
| 
 | 
  3282     }
 | 
| 
 | 
  3283 }
 | 
| 
 | 
  3284 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3285 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3286 sub floor {
 | 
| 
 | 
  3287     my $nb = $_[0];
 | 
| 
 | 
  3288     $nb=~ s/\..*//;
 | 
| 
 | 
  3289     return $nb;
 | 
| 
 | 
  3290 }
 | 
| 
 | 
  3291 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3292 sub decimal{
 | 
| 
 | 
  3293     
 | 
| 
 | 
  3294   my $num=shift;
 | 
| 
 | 
  3295   my $digs_to_cut=shift;
 | 
| 
 | 
  3296 
 | 
| 
 | 
  3297   $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/);
 | 
| 
 | 
  3298 
 | 
| 
 | 
  3299   return $num;
 | 
| 
 | 
  3300 }
 | 
| 
 | 
  3301 
 | 
| 
 | 
  3302 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3303 sub max {
 | 
| 
 | 
  3304     
 | 
| 
 | 
  3305     my($max) = shift(@_);
 | 
| 
 | 
  3306     foreach my $temp (@_) {
 | 
| 
 | 
  3307         $max = $temp if $temp > $max;
 | 
| 
 | 
  3308     }
 | 
| 
 | 
  3309     return($max);
 | 
| 
 | 
  3310 }
 | 
| 
 | 
  3311 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3312 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3313 sub min {
 | 
| 
 | 
  3314     
 | 
| 
 | 
  3315     my($min) = shift(@_);
 | 
| 
 | 
  3316     foreach my $temp (@_) {
 | 
| 
 | 
  3317         $min = $temp if $temp < $min;
 | 
| 
 | 
  3318     }
 | 
| 
 | 
  3319     return($min);
 | 
| 
 | 
  3320 }
 | 
| 
 | 
  3321 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3322 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3323 sub sortTablebyIndex{
 | 
| 
 | 
  3324     my ($tab1,$tab2)=@_;
 | 
| 
 | 
  3325     my @tab3;
 | 
| 
 | 
  3326     
 | 
| 
 | 
  3327     foreach my $i (@$tab1){
 | 
| 
 | 
  3328 	$tab3[$i]=$$tab2[$$tab1[$i]];
 | 
| 
 | 
  3329     }
 | 
| 
 | 
  3330     return @tab3;
 | 
| 
 | 
  3331 }
 | 
| 
 | 
  3332 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3333 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3334 sub round {
 | 
| 
 | 
  3335   my $number = shift || 0;
 | 
| 
 | 
  3336   my $dec = 10 ** (shift || 0);
 | 
| 
 | 
  3337   return int( $dec * $number + .5 * ($number <=> 0)) / $dec;
 | 
| 
 | 
  3338 }
 | 
| 
 | 
  3339 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3340 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3341 sub getUniqueTable{
 | 
| 
 | 
  3342     
 | 
| 
 | 
  3343     my (@tab)=@_;
 | 
| 
 | 
  3344     my (%saw,@out)=();
 | 
| 
 | 
  3345     undef %saw;
 | 
| 
 | 
  3346     return sort(grep(!$saw{$_}++, @tab)); 
 | 
| 
 | 
  3347 }
 | 
| 
 | 
  3348 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3349 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3350 sub catFiles {
 | 
| 
 | 
  3351     
 | 
| 
 | 
  3352     unlink("$_[1]") if(exists $_[1]);
 | 
| 
 | 
  3353     system qq( cat "$_" >> "$_[1]" ) for @{$_[0]};
 | 
| 
 | 
  3354 }
 | 
| 
 | 
  3355 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3356 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3357 #check if the configuration file is correct
 | 
| 
 | 
  3358 sub validateconfiguration{
 | 
| 
 | 
  3359     
 | 
| 
 | 
  3360     my %conf=%{$_[0]};
 | 
| 
 | 
  3361     my $list_prgs="@ARGV";
 | 
| 
 | 
  3362     
 | 
| 
 | 
  3363     my @general_params=qw(input_format mates_orientation read1_length read2_length mates_file cmap_file);
 | 
| 
 | 
  3364     my @detection_params=qw(split_mate_file window_size step_length split_mate_file);
 | 
| 
 | 
  3365     my @filtering_params=qw(split_link_file nb_pairs_threshold strand_filtering split_link_file);
 | 
| 
 | 
  3366     my @circos_params=qw(organism_id colorcode);
 | 
| 
 | 
  3367     my @bed_params=qw(colorcode);
 | 
| 
 | 
  3368     my @compare_params=qw(list_samples file_suffix);
 | 
| 
 | 
  3369     
 | 
| 
 | 
  3370     foreach my $dir ($conf{general}{output_dir},$conf{general}{tmp_dir}){
 | 
| 
 | 
  3371 	
 | 
| 
 | 
  3372 	unless (defined($dir)) {
 | 
| 
 | 
  3373 	    $dir = ".";
 | 
| 
 | 
  3374 	}
 | 
| 
 | 
  3375 	unless (-d $dir){
 | 
| 
 | 
  3376 	    mkdir $dir or die;
 | 
| 
 | 
  3377 	}
 | 
| 
 | 
  3378 	$dir.="/" if($dir!~/\/$/);
 | 
| 
 | 
  3379     }
 | 
| 
 | 
  3380     
 | 
| 
 | 
  3381     unless (defined($conf{general}{num_threads})) {
 | 
| 
 | 
  3382 	    $conf{general}{num_threads} = 1;
 | 
| 
 | 
  3383 	}
 | 
| 
 | 
  3384     $conf{general}{num_threads}=24 if($conf{general}{num_threads}>24);
 | 
| 
 | 
  3385     
 | 
| 
 | 
  3386     if($list_prgs!~/links2compare/){
 | 
| 
 | 
  3387     
 | 
| 
 | 
  3388 	foreach my $p (@general_params){
 | 
| 
 | 
  3389 	    die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{general}{$p});
 | 
| 
 | 
  3390 	}
 | 
| 
 | 
  3391 	
 | 
| 
 | 
  3392 	$conf{general}{input_format}="sam" if($conf{general}{input_format} eq "bam");
 | 
| 
 | 
  3393 	
 | 
| 
 | 
  3394 	unless (defined($conf{general}{sv_type})) {
 | 
| 
 | 
  3395 	    $conf{general}{sv_type} = "all";
 | 
| 
 | 
  3396 	}
 | 
| 
 | 
  3397 	
 | 
| 
 | 
  3398 	$conf{general}{read_lengths}={ 1=> $conf{general}{read1_length}, 2=> $conf{general}{read2_length}};
 | 
| 
 | 
  3399     }
 | 
| 
 | 
  3400     
 | 
| 
 | 
  3401     if($list_prgs=~/(linking|cnv)/){
 | 
| 
 | 
  3402 	
 | 
| 
 | 
  3403 	foreach my $p (@detection_params){
 | 
| 
 | 
  3404 	    die("Error Config : The parameter \"$p\" is not defined\n") if (!defined $conf{detection}{$p});
 | 
| 
 | 
  3405 	}
 | 
| 
 | 
  3406 	
 | 
| 
 | 
  3407 	die("Error Config : The parameter \"mates_file_ref\" is not defined\n") if($list_prgs=~/cnv/ && !defined $conf{detection}{mates_file_ref});
 | 
| 
 | 
  3408 	
 | 
| 
 | 
  3409 	if($conf{detection}{step_length}>$conf{detection}{window_size}){
 | 
| 
 | 
  3410 	    die("Error Config : Parameter \"step_length\" should not exceed \"window size\"\n");
 | 
| 
 | 
  3411 	}
 | 
| 
 | 
  3412 	
 | 
| 
 | 
  3413 	unless (-d $conf{general}{tmp_dir}."/mates"){
 | 
| 
 | 
  3414 	    mkdir $conf{general}{tmp_dir}."/mates" or die;
 | 
| 
 | 
  3415 	}
 | 
| 
 | 
  3416 	
 | 
| 
 | 
  3417 	if($list_prgs=~/linking/){
 | 
| 
 | 
  3418 	    unless (-d $conf{general}{tmp_dir}."/links"){
 | 
| 
 | 
  3419 		mkdir $conf{general}{tmp_dir}."/links" or die;
 | 
| 
 | 
  3420 	    }
 | 
| 
 | 
  3421 	}
 | 
| 
 | 
  3422 	if($list_prgs=~/cnv/){
 | 
| 
 | 
  3423 	    unless (-d $conf{general}{tmp_dir}."/density"){
 | 
| 
 | 
  3424 		mkdir $conf{general}{tmp_dir}."/density" or die;
 | 
| 
 | 
  3425 	    }
 | 
| 
 | 
  3426 	}
 | 
| 
 | 
  3427 	
 | 
| 
 | 
  3428     }
 | 
| 
 | 
  3429     
 | 
| 
 | 
  3430     if($list_prgs=~/filtering/){
 | 
| 
 | 
  3431     
 | 
| 
 | 
  3432 	foreach my $p (@filtering_params) {
 | 
| 
 | 
  3433 	    die("Error Config : The filtering parameter \"$p\" is not defined\n") if (!defined $conf{filtering}{$p});
 | 
| 
 | 
  3434 	    
 | 
| 
 | 
  3435 	}
 | 
| 
 | 
  3436 	
 | 
| 
 | 
  3437 	if(defined($conf{filtering}{chromosomes})) {
 | 
| 
 | 
  3438 	    my @chrs=split(",",$conf{filtering}{chromosomes});
 | 
| 
 | 
  3439 	    my $exclude=($chrs[0]=~/^\-/)? 1:0;
 | 
| 
 | 
  3440 	    for my $chrName (@chrs){
 | 
| 
 | 
  3441 	      
 | 
| 
 | 
  3442 		die("Error Config : The filtering parameter \"chromosomes\" is not valid\n")
 | 
| 
 | 
  3443 		if(($chrName!~/^\-/ && $exclude) || ($chrName=~/^\-/ && !$exclude));
 | 
| 
 | 
  3444 		
 | 
| 
 | 
  3445 	    }
 | 
| 
 | 
  3446 	}
 | 
| 
 | 
  3447 	
 | 
| 
 | 
  3448 	if (( $conf{filtering}{order_filtering} )&& !$conf{filtering}{strand_filtering}) {
 | 
| 
 | 
  3449 	    die("Error Config : The parameter strand_filtering is set to \"0\" while order_filtering is selected".
 | 
| 
 | 
  3450 		"\nChange strand_filtering to \"1\" if you want to use the order filtering\n");
 | 
| 
 | 
  3451 	}
 | 
| 
 | 
  3452 	if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{order_filtering}) {
 | 
| 
 | 
  3453 	    die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use order filtering\n");
 | 
| 
 | 
  3454 	}
 | 
| 
 | 
  3455 	if (( $conf{filtering}{insert_size_filtering} )&& !$conf{filtering}{strand_filtering}) {
 | 
| 
 | 
  3456 	    die("Error Config : The parameter strand_filtering is set to \"0\" while insert_size_filtering is selected".
 | 
| 
 | 
  3457 		"\nChange strand_filtering to \"1\" if you want to use the insert size filtering\n");
 | 
| 
 | 
  3458 	}
 | 
| 
 | 
  3459 	if (( !defined($conf{filtering}{mu_length}) || !defined($conf{filtering}{sigma_length}) )&& $conf{filtering}{insert_size_filtering}) {
 | 
| 
 | 
  3460 	    die("Error Config : You should set parameters \"mu_length\" and \"sigma_length\" to use discriminate insertions from deletions\n");
 | 
| 
 | 
  3461 	}
 | 
| 
 | 
  3462 	
 | 
| 
 | 
  3463 	if (!defined($conf{filtering}{indel_sigma_threshold})) {
 | 
| 
 | 
  3464 	    $conf{filtering}{indel_sigma_threshold} = 2;
 | 
| 
 | 
  3465 	}
 | 
| 
 | 
  3466 	if (!defined($conf{filtering}{dup_sigma_threshold})) {
 | 
| 
 | 
  3467 	    $conf{filtering}{dup_sigma_threshold} = 2;
 | 
| 
 | 
  3468 	}
 | 
| 
 | 
  3469 	if (!defined($conf{filtering}{singleton_sigma_threshold})) {
 | 
| 
 | 
  3470 	    $conf{filtering}{singleton_sigma_threshold} = 4;
 | 
| 
 | 
  3471 	}
 | 
| 
 | 
  3472 	
 | 
| 
 | 
  3473 	if (!defined($conf{filtering}{nb_pairs_order_threshold})) {
 | 
| 
 | 
  3474 	    $conf{filtering}{nb_pairs_order_threshold} = 1;
 | 
| 
 | 
  3475 	}
 | 
| 
 | 
  3476 	
 | 
| 
 | 
  3477 	if (!defined($conf{filtering}{final_score_threshold})) {
 | 
| 
 | 
  3478 	    $conf{filtering}{final_score_threshold} = 0.8;
 | 
| 
 | 
  3479 	}
 | 
| 
 | 
  3480 	
 | 
| 
 | 
  3481 	if ($conf{filtering}{nb_pairs_order_threshold}>$conf{filtering}{nb_pairs_threshold}) {
 | 
| 
 | 
  3482 	    die("Error Config : Parameter \"nb_pairs_order_threshold\" should not exceed \"nb_pairs_threshold\"\n");
 | 
| 
 | 
  3483 	}
 | 
| 
 | 
  3484 	
 | 
| 
 | 
  3485     }
 | 
| 
 | 
  3486     
 | 
| 
 | 
  3487     if($list_prgs=~/2circos$/){
 | 
| 
 | 
  3488 	foreach my $p (@circos_params) {
 | 
| 
 | 
  3489 	    next if($list_prgs=~/^ratio/ && $p eq "colorcode");
 | 
| 
 | 
  3490 	    die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
 | 
| 
 | 
  3491 	}
 | 
| 
 | 
  3492     }
 | 
| 
 | 
  3493     
 | 
| 
 | 
  3494     if($list_prgs=~/2bed$/){
 | 
| 
 | 
  3495 	foreach my $p (@bed_params) {
 | 
| 
 | 
  3496 	    die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
 | 
| 
 | 
  3497 	}
 | 
| 
 | 
  3498     }
 | 
| 
 | 
  3499     
 | 
| 
 | 
  3500     if($list_prgs=~/links2compare/){
 | 
| 
 | 
  3501 	foreach my $p (@compare_params) {
 | 
| 
 | 
  3502 	    die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p});
 | 
| 
 | 
  3503 	}
 | 
| 
 | 
  3504 	
 | 
| 
 | 
  3505 	unless (defined($conf{compare}{same_sv_type})) {
 | 
| 
 | 
  3506 	    $conf{compare}{same_sv_type} = 0;
 | 
| 
 | 
  3507 	}
 | 
| 
 | 
  3508 	
 | 
| 
 | 
  3509 	unless (defined($conf{compare}{min_overlap})) {
 | 
| 
 | 
  3510 	    $conf{compare}{min_overlap} = 1E-9;
 | 
| 
 | 
  3511 	}
 | 
| 
 | 
  3512 	
 | 
| 
 | 
  3513 	if($conf{compare}{circos_output}){
 | 
| 
 | 
  3514 	    foreach my $p (@circos_params) {
 | 
| 
 | 
  3515 		next if($list_prgs=~/^ratio/ && $p eq "colorcode");
 | 
| 
 | 
  3516 		die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p});
 | 
| 
 | 
  3517 	    }
 | 
| 
 | 
  3518 	}
 | 
| 
 | 
  3519 	if($conf{compare}{bed_output}){
 | 
| 
 | 
  3520 	    foreach my $p (@bed_params) {
 | 
| 
 | 
  3521 		die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p});
 | 
| 
 | 
  3522 	    }
 | 
| 
 | 
  3523 	    die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths});
 | 
| 
 | 
  3524 
 | 
| 
 | 
  3525 	    my @samples=split(",",$conf{compare}{list_samples});
 | 
| 
 | 
  3526 	    my @read_lengths=split(",",$conf{compare}{list_read_lengths});
 | 
| 
 | 
  3527 	    for my $i (0..$#samples){
 | 
| 
 | 
  3528 		my @l=split("-",$read_lengths[$i]);
 | 
| 
 | 
  3529 		$conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]};
 | 
| 
 | 
  3530 	    }
 | 
| 
 | 
  3531 	}
 | 
| 
 | 
  3532     }
 | 
| 
 | 
  3533    
 | 
| 
 | 
  3534     
 | 
| 
 | 
  3535 }
 | 
| 
 | 
  3536 #------------------------------------------------------------------------------#
 | 
| 
 | 
  3537 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::#
 |