| 3 | 1 #!/usr/bin/perl -w | 
|  | 2 | 
|  | 3 =pod | 
|  | 4 | 
|  | 5 =head1 NAME | 
|  | 6 | 
|  | 7 SVDetect Compare for Galaxy | 
|  | 8 | 
|  | 9 Version: 0.8b for Galaxy | 
|  | 10 | 
|  | 11 =head1 SYNOPSIS | 
|  | 12 | 
|  | 13 SVDetect_compare.pl links2compare -conf <configuration_file> [-help] [-man] | 
|  | 14 | 
|  | 15 =cut | 
|  | 16 | 
|  | 17 # ------------------------------------------------------------------- | 
|  | 18 | 
|  | 19 use strict; | 
|  | 20 use warnings; | 
|  | 21 | 
|  | 22 use Pod::Usage; | 
|  | 23 use Getopt::Long; | 
|  | 24 | 
|  | 25 use Config::General; | 
|  | 26 use Tie::IxHash; | 
|  | 27 | 
|  | 28 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 29 #PARSE THE COMMAND LINE | 
|  | 30 my %OPT; | 
|  | 31 GetOptions(\%OPT, | 
|  | 32 	   'conf=s', | 
|  | 33 	   'out1=s', #GALAXY | 
|  | 34 	   'out2=s', #GALAXY | 
|  | 35 	   'out3=s', #GALAXY | 
|  | 36 	   'out4=s', #GALAXY | 
|  | 37 	   'out5=s', #GALAXY | 
|  | 38 	   'out6=s', #GALAXY | 
|  | 39 	   'out7=s', #GALAXY | 
|  | 40 	   'out8=s', #GALAXY | 
|  | 41 	   'out9=s', #GALAXY | 
|  | 42 	   'l=s', #GALAXY | 
|  | 43 	   'N=s', #GALAXY | 
|  | 44 	   'help', | 
|  | 45            'man' | 
|  | 46 	  ); | 
|  | 47 | 
|  | 48 pod2usage() if $OPT{help}; | 
|  | 49 pod2usage(-verbose=>2) if $OPT{man}; | 
|  | 50 pod2usage(-message=> "$!", -exitval => 2) if (!defined $OPT{conf}); | 
|  | 51 | 
|  | 52 | 
|  | 53 pod2usage() if(@ARGV<1); | 
|  | 54 | 
|  | 55 tie (my %func, 'Tie::IxHash',links2compare=>\&links2compare); | 
|  | 56 | 
|  | 57 foreach my $command (@ARGV){ | 
|  | 58     pod2usage(-message=> "Unknown command \"$command\"", -exitval => 2) if (!defined($func{$command})); | 
|  | 59 } | 
|  | 60 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 61 | 
|  | 62 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 63 #READ THE CONFIGURATION FILE | 
|  | 64 my $conf=Config::General->new(    -ConfigFile        => $OPT{conf}, | 
|  | 65                                   -Tie => "Tie::IxHash", | 
|  | 66                                   -AllowMultiOptions => 1, | 
|  | 67 				  -LowerCaseNames    => 1, | 
|  | 68 				  -AutoTrue => 1); | 
|  | 69 my %CONF= $conf->getall; | 
|  | 70 validateconfiguration(\%CONF);							#validation of the configuration parameters | 
|  | 71 | 
|  | 72 | 
|  | 73 my $SAMTOOLS_BIN_DIR="/bioinfo/local/samtools"; #GALAXY | 
|  | 74 my $BEDTOOLS_BIN_DIR="/bioinfo/local/BEDTools/bin"; #GALAXY | 
|  | 75 | 
|  | 76 my $pt_log_file=$OPT{l}; #GALAXY | 
|  | 77 my $log_file=$CONF{general}{output_dir}.$OPT{N}.".svdetect_compare.log"; #GALAXY | 
|  | 78 open LOG,">$log_file" or die "$0: can't open ".$log_file.":$!\n";#GALAXY | 
|  | 79 | 
|  | 80 my @pt_sv_file=($OPT{out1},$OPT{out2},$OPT{out3}) if($OPT{out1}); #GALAXY common,sample,reference | 
|  | 81 my @pt_circos_file=($OPT{out4},$OPT{out5},$OPT{out6}) if($OPT{out4}); #GALAXY common,sample,reference | 
|  | 82 my @pt_bed_file=($OPT{out7},$OPT{out8},$OPT{out9}) if($OPT{out7}); #GALAXY common,sample,reference | 
|  | 83 | 
|  | 84 $CONF{compare}{sample_link_file}=readlink($CONF{compare}{sample_link_file});#GALAXY | 
|  | 85 $CONF{compare}{sample_link_file}=~s/.sv.txt//; #GALAXY | 
|  | 86 | 
|  | 87 $CONF{compare}{reference_link_file}=readlink($CONF{compare}{reference_link_file});#GALAXY | 
|  | 88 $CONF{compare}{reference_link_file}=~s/.sv.txt//; #GALAXY | 
|  | 89 | 
|  | 90 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 91 | 
|  | 92 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 93 #COMMAND EXECUTION | 
|  | 94 foreach my $command (@ARGV){ | 
|  | 95     &{$func{$command}}(); | 
|  | 96 } | 
|  | 97 print LOG "-- end\n"; | 
|  | 98 | 
|  | 99 close LOG;#GALAXY | 
|  | 100 system "rm $pt_log_file ; ln -s $log_file $pt_log_file"; #GALAXY | 
|  | 101 | 
|  | 102 exit(0); | 
|  | 103 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 104 | 
|  | 105 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# | 
|  | 106 #FUNCTIONS | 
|  | 107 | 
|  | 108 # -----------------------------------------------------------------------------# | 
|  | 109 #MAIN FUNCTION number 5:Comparison between samples, common or specific links | 
|  | 110 sub links2compare{ | 
|  | 111 | 
|  | 112     my @compare_files; | 
|  | 113 | 
|  | 114     compareSamples($CONF{general}{output_dir}, | 
|  | 115 		   $CONF{compare}{list_samples}, | 
|  | 116 		   $CONF{compare}{sample_link_file}, | 
|  | 117 		   $CONF{compare}{reference_link_file}, | 
|  | 118 		   $CONF{compare}{min_overlap}, | 
|  | 119 		   $CONF{compare}{same_sv_type}, | 
|  | 120 		   \@compare_files); | 
|  | 121 | 
|  | 122     my $pt_ind=0; | 
|  | 123 | 
|  | 124     for my $input_file (@compare_files){ | 
|  | 125 | 
|  | 126 	$input_file=$CONF{general}{output_dir}.$input_file; | 
|  | 127 | 
|  | 128 	my $output_file=$input_file; | 
|  | 129 	$output_file=~s/unique$/compared/; | 
|  | 130 | 
|  | 131 	sortLinks($input_file, $output_file,1); | 
|  | 132 | 
|  | 133 	if($CONF{compare}{circos_output}){ | 
|  | 134 	    links2segdup($CONF{circos}{organism_id}, | 
|  | 135 			 $CONF{circos}{colorcode}, | 
|  | 136 			 $output_file, | 
|  | 137 			 $output_file.".segdup.txt"); | 
|  | 138 	    system "rm $pt_circos_file[$pt_ind]; ln -s $output_file.segdup.txt $pt_circos_file[$pt_ind]" if (defined $pt_circos_file[$pt_ind]); #GALAXY | 
|  | 139 	} | 
|  | 140 | 
|  | 141 	if($CONF{compare}{bed_output}){ | 
|  | 142 	links2bedfile($CONF{compare}{read_lengths}, | 
|  | 143 		      $CONF{bed}{colorcode}, | 
|  | 144 		      $output_file, | 
|  | 145 		      $output_file.".bed"); | 
|  | 146 	system "rm $pt_bed_file[$pt_ind]; ln -s $output_file.bed $pt_bed_file[$pt_ind]" if (defined $pt_bed_file[$pt_ind]); #GALAXY | 
|  | 147 	} | 
|  | 148 | 
|  | 149 	if($CONF{compare}{sv_output}){ | 
|  | 150 | 
|  | 151 	    links2SVfile ($output_file, $output_file.".sv.txt"); | 
|  | 152 	    system "rm $pt_sv_file[$pt_ind]; ln -s $output_file.sv.txt $pt_sv_file[$pt_ind]" if (defined $pt_sv_file[$pt_ind]); #GALAXY | 
|  | 153 	} | 
|  | 154 	$pt_ind++; | 
|  | 155 | 
|  | 156     } | 
|  | 157     unlink(@compare_files); | 
|  | 158 | 
|  | 159 } | 
|  | 160 #------------------------------------------------------------------------------# | 
|  | 161 #------------------------------------------------------------------------------# | 
|  | 162 sub compareSamples{ | 
|  | 163 | 
|  | 164     my ($dir,$list_samples,$sample_file,$reference_file,$min_overlap,$same_sv_type,$file_names)=@_; | 
|  | 165 | 
|  | 166     my @bedpefiles; | 
|  | 167     my @list=split(",",$list_samples); | 
|  | 168     my @list_files=($sample_file,$reference_file); | 
|  | 169 | 
|  | 170     print LOG "\# Comparison procedure...\n"; | 
|  | 171     print LOG "-- samples=$list_samples\n". | 
|  | 172 	 "-- minimum overlap=$min_overlap\n". | 
|  | 173 	 "-- same SV type=$same_sv_type\n"; | 
|  | 174 | 
|  | 175     #conversion of links to bedPE format file | 
|  | 176     print LOG "-- Conversion of links.filtered files to bedPE format\n"; | 
|  | 177     for my $s (0..$#list) { | 
|  | 178 | 
|  | 179 	links2bedPElinksfile($list[$s],$list_files[$s],$list_files[$s].".bedpe.txt"); | 
|  | 180 	push(@bedpefiles,$list_files[$s].".bedpe.txt"); | 
|  | 181 | 
|  | 182     } | 
|  | 183 | 
|  | 184     #get common links between all samples compared | 
|  | 185     print LOG "-- Getting common links between all samples with BEDTools\n"; | 
|  | 186     my $common_name=join(".",@list); | 
|  | 187 | 
|  | 188     my $nb=scalar @list; | 
|  | 189     my $command=""; | 
|  | 190     my $prompt=">"; | 
|  | 191 | 
|  | 192     while ($nb>0){ | 
|  | 193 | 
|  | 194 	for my $i (0..$#list_files){ | 
|  | 195 | 
|  | 196 	    $command.="$BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a ".$list_files[$i].".bedpe.txt"; | 
|  | 197 	    my $pipe=0; | 
|  | 198 | 
|  | 199 	    for my $j ($i+1..$#list_files){ | 
|  | 200 | 
|  | 201 		$command.="| $BEDTOOLS_BIN_DIR/pairToPair -type both -f $min_overlap -a stdin" if($pipe); | 
|  | 202 		$command.=" -b ".$list_files[$j].".bedpe.txt"; | 
|  | 203 		$pipe=1; | 
|  | 204 | 
|  | 205 	    } | 
|  | 206 | 
|  | 207 	    $command.=$prompt.$dir.$common_name.".bedpe.tmp;"; | 
|  | 208 	    $prompt=">>"; | 
|  | 209 | 
|  | 210 	    my $first=shift(@list_files); push(@list_files,$first); | 
|  | 211 	    last; | 
|  | 212 	} | 
|  | 213 	$nb--; | 
|  | 214     } | 
|  | 215 | 
|  | 216     system ($command); | 
|  | 217 | 
|  | 218     push(@bedpefiles,$dir.$common_name.".bedpe.tmp"); | 
|  | 219 | 
|  | 220     #Post comparison to get common links if same type only (as an option) | 
|  | 221     open( FILE, "<".$dir.$common_name.".bedpe.tmp") or die "Can't open".$dir.$common_name.".bedpe.tmp : $!"; | 
|  | 222     open( OUT, ">".$dir.$common_name.".bedpe.unique") or die "Can't write in ".$dir.$common_name.".bedpe.unique : $!"; | 
|  | 223 | 
|  | 224     while(<FILE>){ | 
|  | 225 	my @t=split("\t",$_); | 
|  | 226 	my $s=(split("_",$t[6]))[0]; | 
|  | 227 	my ($sv1,$sv2)=($t[7],$t[18]); | 
|  | 228 	splice(@t,11,$#t); | 
|  | 229 | 
|  | 230 	if($same_sv_type){ | 
|  | 231 	    print OUT join("\t",@t)."\n" if($sv1 eq $sv2); | 
|  | 232 	}else{ | 
|  | 233 	    print OUT join("\t",@t)."\n"; | 
|  | 234 	} | 
|  | 235     } | 
|  | 236     close FILE; | 
|  | 237     close OUT; | 
|  | 238 | 
|  | 239     bedPElinks2linksfile($dir.$common_name.".bedpe.unique", $dir.$common_name.".unique"); | 
|  | 240     push(@bedpefiles,$dir.$common_name.".bedpe.unique"); | 
|  | 241     push(@$file_names,$common_name.".unique"); | 
|  | 242     print LOG "-- output created: ".$dir.$common_name.".compared\n"; | 
|  | 243 | 
|  | 244     #get specific links for each sample | 
|  | 245     print LOG "-- Getting specific links for each sample\n"; | 
|  | 246     for my $s (0..$#list) { | 
|  | 247 	system("grep -Fxv -f ".$dir.$common_name.".bedpe.unique ".$list_files[$s].".bedpe.txt >".$dir.$list[$s].".bedpe.unique"); | 
|  | 248 	bedPElinks2linksfile($dir.$list[$s].".bedpe.unique",$dir.$list[$s].".unique"); | 
|  | 249 	push(@bedpefiles,$dir.$list[$s].".bedpe.unique"); | 
|  | 250 	push(@$file_names,$list[$s].".unique"); | 
|  | 251 	print LOG "-- output created: ".$dir.$list[$s].".compared\n"; | 
|  | 252     } | 
|  | 253 | 
|  | 254     unlink(@bedpefiles); | 
|  | 255 | 
|  | 256 } | 
|  | 257 #------------------------------------------------------------------------------# | 
|  | 258 #------------------------------------------------------------------------------# | 
|  | 259 #convert the links file to the circos format | 
|  | 260 sub links2segdup{ | 
|  | 261 | 
|  | 262     my($id,$color_code,$links_file,$segdup_file)=@_; | 
|  | 263 | 
|  | 264     print LOG "\# Converting to the circos format...\n"; | 
|  | 265 | 
|  | 266     tie (my %hcolor,'Tie::IxHash');						#color-code hash table | 
|  | 267     foreach my $col (keys %{$color_code}){ | 
|  | 268 	my ($min_links,$max_links)=split(",",$color_code->{$col}); | 
|  | 269 	$hcolor{$col}=[$min_links,$max_links]; | 
|  | 270     } | 
|  | 271 | 
|  | 272     open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n"; | 
|  | 273     open SEGDUP, ">$segdup_file" or die "$0: can't write in the output: $segdup_file :$!\n"; | 
|  | 274 | 
|  | 275     my $index=1; | 
|  | 276     while(<LINKS>){ | 
|  | 277 | 
|  | 278 	my ($chr1,$start1,$end1,$chr2,$start2,$end2,$count)=(split)[0,1,2,3,4,5,6]; | 
|  | 279 | 
|  | 280 	my $color=getColor($count,\%hcolor,"circos");				#get the color-code according the number of links | 
|  | 281 | 
|  | 282 	print SEGDUP "$index\t$id$chr1\t$start1\t$end1\tcolor=$color\n".	#circos output | 
|  | 283 		     "$index\t$id$chr2\t$start2\t$end2\tcolor=$color\n"; | 
|  | 284 	$index++; | 
|  | 285     } | 
|  | 286 | 
|  | 287     close LINKS; | 
|  | 288     close SEGDUP; | 
|  | 289     print LOG "-- output created: $segdup_file\n"; | 
|  | 290 } | 
|  | 291 #------------------------------------------------------------------------------# | 
|  | 292 #------------------------------------------------------------------------------# | 
|  | 293 #convert the links file to the bedPE format for BEDTools usage | 
|  | 294 sub links2bedPElinksfile{ | 
|  | 295 | 
|  | 296     my ($sample,$links_file,$bedpe_file)=@_; | 
|  | 297 | 
|  | 298     open LINKS, "<$links_file" or die "$0: can't open $links_file :$!\n"; | 
|  | 299     open BEDPE, ">$bedpe_file" or die "$0: can't write in the output: $bedpe_file :$!\n"; | 
|  | 300 | 
|  | 301     my $nb_links=1; | 
|  | 302 | 
|  | 303     while(<LINKS>){ | 
|  | 304 | 
|  | 305 	chomp; | 
|  | 306 	my @t=split("\t",$_); | 
|  | 307 	my ($chr1,$start1,$end1,$chr2,$start2,$end2)=splice(@t,0,6); | 
|  | 308 	my $type=($chr1 eq $chr2)? "INTRA":"INTER"; | 
|  | 309 	$type.="_".$t[10]; | 
|  | 310 | 
|  | 311 	$start1--; $start2--; | 
|  | 312 | 
|  | 313 	print BEDPE "$chr1\t$start1\t$end1\t$chr2\t$start2\t$end2". | 
|  | 314 	"\t$sample"."_link$nb_links\t$type\t.\t.". | 
|  | 315 	"\t".join("|",@t)."\n"; | 
|  | 316 | 
|  | 317 	$nb_links++; | 
|  | 318     } | 
|  | 319 | 
|  | 320     close LINKS; | 
|  | 321     close BEDPE; | 
|  | 322 | 
|  | 323 } | 
|  | 324 #------------------------------------------------------------------------------# | 
|  | 325 #------------------------------------------------------------------------------# | 
|  | 326 sub bedPElinks2linksfile{ | 
|  | 327 | 
|  | 328     my ($bedpe_file,$links_file)=@_; | 
|  | 329 | 
|  | 330     open BEDPE, "<$bedpe_file" or die "$0: can't open: $bedpe_file :$!\n"; | 
|  | 331     open LINKS, ">$links_file" or die "$0: can't write in the output $links_file :$!\n"; | 
|  | 332 | 
|  | 333     while(<BEDPE>){ | 
|  | 334 | 
|  | 335 	chomp; | 
|  | 336 	my $sample=(split("_",(split("\t",$_))[6]))[0]; | 
|  | 337 	my @t1=(split("\t",$_))[0,1,2,3,4,5]; | 
|  | 338 	my @t2=split(/\|/,(split("\t",$_))[10]); | 
|  | 339 	push(@t2,$sample); | 
|  | 340 | 
|  | 341 	print LINKS join("\t",@t1)."\t".join("\t",@t2)."\n"; | 
|  | 342 | 
|  | 343     } | 
|  | 344     close BEDPE; | 
|  | 345     close LINKS; | 
|  | 346 | 
|  | 347 } | 
|  | 348 #------------------------------------------------------------------------------# | 
|  | 349 #------------------------------------------------------------------------------# | 
|  | 350 #convert the links file to the bed format | 
|  | 351 sub links2bedfile{ | 
|  | 352 | 
|  | 353     my ($tag_length,$color_code,$links_file,$bed_file)=@_; | 
|  | 354 | 
|  | 355     print LOG "\# Converting to the bed format...\n"; | 
|  | 356 | 
|  | 357     my $compare=1; | 
|  | 358     if($links_file!~/compared$/){ | 
|  | 359 	$compare=0; | 
|  | 360 	$tag_length->{none}->{1}=$tag_length->{1}; | 
|  | 361 	$tag_length->{none}->{2}=$tag_length->{2}; | 
|  | 362     } | 
|  | 363 | 
|  | 364     #color-code hash table | 
|  | 365     tie (my %hcolor,'Tie::IxHash'); | 
|  | 366     my %color_order; | 
|  | 367     $color_order{"255,255,255"}=0; | 
|  | 368     my $n=1; | 
|  | 369     foreach my $col (keys %{$color_code}){ | 
|  | 370 	my ($min_links,$max_links)=split(",",$color_code->{$col}); | 
|  | 371 	$hcolor{$col}=[$min_links,$max_links]; | 
|  | 372 	$color_order{$col}=$n; | 
|  | 373 	$n++; | 
|  | 374     } | 
|  | 375 | 
|  | 376     my %pair; | 
|  | 377     my %pt; | 
|  | 378     $n=1; | 
|  | 379     open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n"; | 
|  | 380 | 
|  | 381     my %str=( "F"=>"+", "R"=>"-" ); | 
|  | 382 | 
|  | 383     while(<LINKS>){ | 
|  | 384 | 
|  | 385 	my @t=split; | 
|  | 386 	my $sample=($compare)? pop(@t):"none"; | 
|  | 387 | 
|  | 388 	my $chr1=$t[0]; | 
|  | 389 	my $chr2=$t[3]; | 
|  | 390 	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i); | 
|  | 391 	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i); | 
|  | 392 	my $same_chr=($chr1 eq $chr2)? 1:0; | 
|  | 393 | 
|  | 394 	my $count=$t[6]; | 
|  | 395 	my $color=getColor($count,\%hcolor,"bed"); | 
|  | 396 | 
|  | 397 	my @pairs=deleteBadOrderSensePairs(split(",",$t[7])); | 
|  | 398 	my @strand1=deleteBadOrderSensePairs(split(",",$t[8])); | 
|  | 399 	my @strand2=deleteBadOrderSensePairs(split(",",$t[9])); | 
|  | 400 	my @ends_order1=deleteBadOrderSensePairs(split(",",$t[10])); | 
|  | 401 	my @ends_order2=deleteBadOrderSensePairs(split(",",$t[11])); | 
|  | 402 	my @position1=deleteBadOrderSensePairs(split(",",$t[14])); | 
|  | 403 	my @position2=deleteBadOrderSensePairs(split(",",$t[15])); | 
|  | 404 	my @start1; my @end1; getCoordswithLeftMost(\@start1,\@end1,\@position1,\@strand1,\@ends_order1,$tag_length->{$sample}); | 
|  | 405 	my @start2; my @end2; getCoordswithLeftMost(\@start2,\@end2,\@position2,\@strand2,\@ends_order2,$tag_length->{$sample}); | 
|  | 406 | 
|  | 407 | 
|  | 408 	for my $p (0..$#pairs){ | 
|  | 409 | 
|  | 410 	    if (!exists $pair{$pairs[$p]}){ | 
|  | 411 | 
|  | 412 		if($same_chr){ | 
|  | 413 | 
|  | 414 		    $pair{$pairs[$p]}->{0}=[ $chr1, $start1[$p]-1, $end2[$p], $pairs[$p], 0, $str{$strand1[$p]}, | 
|  | 415 				    $start1[$p]-1, $end2[$p], $color, | 
|  | 416 				    2, $tag_length->{$sample}->{$ends_order1[$p]}.",".$tag_length->{$sample}->{$ends_order2[$p]}, "0,".($start2[$p]-$start1[$p]) ]; | 
|  | 417 		    $pt{$n}=$pair{$pairs[$p]}->{0}; | 
|  | 418 		    $n++; | 
|  | 419 | 
|  | 420 		}else{ | 
|  | 421 | 
|  | 422 		    $pair{$pairs[$p]}->{1}=[ $chr1, $start1[$p]-1, $end1[$p] , $pairs[$p]."/1", 0, $str{$strand1[$p]}, | 
|  | 423 						$start1[$p]-1, $end1[$p], $color, | 
|  | 424 						1, $tag_length->{$sample}->{$ends_order1[$p]}, 0]; | 
|  | 425 		    $pt{$n}=$pair{$pairs[$p]}->{1}; | 
|  | 426 		    $n++; | 
|  | 427 | 
|  | 428 | 
|  | 429 		    $pair{$pairs[$p]}->{2}=[ $chr2, $start2[$p]-1, $end2[$p], $pairs[$p]."/2", 0, $str{$strand2[$p]}, | 
|  | 430 						$start2[$p]-1, $end2[$p], $color, | 
|  | 431 						1, $tag_length->{$sample}->{$ends_order2[$p]}, 0]; | 
|  | 432 		    $pt{$n}=$pair{$pairs[$p]}->{2}; | 
|  | 433 		    $n++; | 
|  | 434 		} | 
|  | 435 	    }else{ | 
|  | 436 | 
|  | 437 		if($same_chr){ | 
|  | 438 		    ${$pair{$pairs[$p]}->{0}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{0}}[8]}); | 
|  | 439 		}else{ | 
|  | 440 		    ${$pair{$pairs[$p]}->{1}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{1}}[8]}); | 
|  | 441 		    ${$pair{$pairs[$p]}->{2}}[8]=$color if($color_order{$color}>$color_order{${$pair{$pairs[$p]}->{2}}[8]}); | 
|  | 442 		} | 
|  | 443 	    } | 
|  | 444 	} | 
|  | 445     } | 
|  | 446     close LINKS; | 
|  | 447 | 
|  | 448     my $nb_pairs=$n-1; | 
|  | 449 | 
|  | 450     open BED, ">$bed_file" or die "$0: can't write in the output: $bed_file :$!\n"; | 
|  | 451     print BED "track name=\"$bed_file\" description=\"mate pairs involved in links\" ". | 
|  | 452 	      "visibility=2 itemRgb=\"On\"\n"; | 
|  | 453 | 
|  | 454     for my $i (1..$nb_pairs){ | 
|  | 455 	print BED join("\t",@{$pt{$i}})."\n"; | 
|  | 456     } | 
|  | 457 | 
|  | 458     close BED; | 
|  | 459 | 
|  | 460     print LOG "-- output created: $bed_file\n"; | 
|  | 461 | 
|  | 462     undef %pair; | 
|  | 463     undef %pt; | 
|  | 464 | 
|  | 465 } | 
|  | 466 #------------------------------------------------------------------------------# | 
|  | 467 #------------------------------------------------------------------------------# | 
|  | 468 sub links2SVfile{ | 
|  | 469 | 
|  | 470     my($links_file,$sv_file)=@_; | 
|  | 471 | 
|  | 472     print LOG "\# Converting to the sv output table...\n"; | 
|  | 473     open LINKS, "<$links_file" or die "$0: can't open $links_file:$!\n"; | 
|  | 474     open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n"; | 
|  | 475 | 
|  | 476     my @header=qw(chr_type SV_type BAL_type chromosome1 start1-end1 average_dist | 
|  | 477     chromosome2 start2-end2 nb_pairs score_strand_filtering score_order_filtering score_insert_size_filtering | 
|  | 478     final_score breakpoint1_start1-end1 breakpoint2_start2-end2); | 
|  | 479 | 
|  | 480     my $nb_links=0; | 
|  | 481 | 
|  | 482     while (<LINKS>){ | 
|  | 483 | 
|  | 484 	my @t=split; | 
|  | 485 	my @sv=(); | 
|  | 486 	my $sv_type="-"; | 
|  | 487 	my $strand_ratio="-"; | 
|  | 488 	my $eq_ratio="-"; | 
|  | 489 	my $eq_type="-"; | 
|  | 490 	my $insert_ratio="-"; | 
|  | 491 	my $link="-"; | 
|  | 492 	my ($bk1, $bk2)=("-","-"); | 
|  | 493 	my $score="-"; | 
|  | 494 | 
|  | 495 	my ($chr1,$start1,$end1)=($t[0],$t[1],$t[2]); | 
|  | 496 	my ($chr2,$start2,$end2)=($t[3],$t[4],$t[5]); | 
|  | 497 	my $nb_pairs=$t[6]; | 
|  | 498 	$chr1 = "chr".$chr1 unless ($chr1 =~ m/chr/i); | 
|  | 499 	$chr2 = "chr".$chr2 unless ($chr2 =~ m/chr/i); | 
|  | 500 	my $chr_type=($chr1 eq $chr2)? "INTRA":"INTER"; | 
|  | 501 | 
|  | 502 	#if strand filtering | 
|  | 503 	if (defined $t[16]){ | 
|  | 504 	    #if inter-chr link | 
|  | 505 	    $sv_type=$t[16]; | 
|  | 506 	    if(defined $t[17] && $t[17]=~/^(\d+)\/(\d+)$/){ | 
|  | 507 		$strand_ratio=floor($1/$2*100)."%"; | 
|  | 508 		$score=$t[18]; | 
|  | 509 	    } | 
|  | 510 	    if(defined $t[18] && $t[18]=~/^(\d+)\/(\d+)$/){ | 
|  | 511 	    #if intra-chr link with insert size filtering | 
|  | 512 		$strand_ratio=floor($1/$2*100)."%"; | 
|  | 513 		$link=floor($t[17]); | 
|  | 514 		if($sv_type!~/^INV/){ | 
|  | 515 		    $insert_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/); | 
|  | 516 		    $score=$t[20]; | 
|  | 517 		}else{ | 
|  | 518 		    $score=$t[19]; | 
|  | 519 		} | 
|  | 520 	    } | 
|  | 521 	} | 
|  | 522 | 
|  | 523 	if(defined $t[18] && ($t[18] eq "UNBAL" || $t[18] eq "BAL")){ | 
|  | 524 | 
|  | 525 	    #if strand and order filtering only and/or interchr link | 
|  | 526 	    $eq_type=$t[18]; | 
|  | 527 	    $eq_ratio=floor($1/$2*100)."%" if($t[19]=~/^(\d+)\/(\d+)$/); | 
|  | 528 	    ($bk1, $bk2)=($t[20],$t[21]); | 
|  | 529 	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;} | 
|  | 530 	    $score=$t[22]; | 
|  | 531 | 
|  | 532 	}elsif(defined $t[19] && ($t[19] eq "UNBAL" || $t[19] eq "BAL")){ | 
|  | 533 | 
|  | 534 	    #if all three filtering | 
|  | 535 	    $link=floor($t[17]); | 
|  | 536 	    $eq_type=$t[19]; | 
|  | 537 	    $eq_ratio=floor($1/$2*100)."%" if($t[20]=~/^(\d+)\/(\d+)$/); | 
|  | 538 | 
|  | 539 	    if(defined $t[21] && $t[21]=~/^(\d+)\/(\d+)$/){ | 
|  | 540 		$insert_ratio=floor($1/$2*100)."%"; | 
|  | 541 		($bk1, $bk2)=($t[22],$t[23]); | 
|  | 542 		$score=$t[24]; | 
|  | 543 | 
|  | 544 	    }else{ | 
|  | 545 		($bk1, $bk2)=($t[21],$t[22]); | 
|  | 546 		$score=$t[23]; | 
|  | 547 	    } | 
|  | 548 	    foreach my $bk ($bk1, $bk2){$bk=~s/\),\(/ /g; $bk=~s/(\(|\))//g; $bk=~s/,/-/g;} | 
|  | 549 | 
|  | 550 	} | 
|  | 551 | 
|  | 552 | 
|  | 553 	push(@sv, $chr_type, $sv_type,$eq_type); | 
|  | 554 	push(@sv,"$chr1\t$start1-$end1"); | 
|  | 555 	push(@sv, $link); | 
|  | 556 	push(@sv,"$chr2\t$start2-$end2", | 
|  | 557 	     $nb_pairs,$strand_ratio,$eq_ratio,$insert_ratio, decimal($score,4), $bk1, $bk2); | 
|  | 558 | 
|  | 559 | 
|  | 560 	print SV join("\t",@sv)."\n"; | 
|  | 561     } | 
|  | 562 | 
|  | 563     close LINKS; | 
|  | 564     close SV; | 
|  | 565 | 
|  | 566     system "sort  -k 9,9nr -k 13,13nr $sv_file > $sv_file.sorted"; | 
|  | 567 | 
|  | 568     open SV, "<".$sv_file.".sorted" or die "$0: can't open in the output: $sv_file".".sorted :$!\n"; | 
|  | 569     my @links=<SV>; | 
|  | 570     close SV; | 
|  | 571 | 
|  | 572     open SV, ">$sv_file" or die "$0: can't write in the output: $sv_file :$!\n"; | 
|  | 573 | 
|  | 574     print SV join("\t",@header)."\n"; | 
|  | 575     print SV @links; | 
|  | 576     close SV; | 
|  | 577 | 
|  | 578     unlink($sv_file.".sorted"); | 
|  | 579 | 
|  | 580     print LOG "-- output created: $sv_file\n"; | 
|  | 581 | 
|  | 582 } | 
|  | 583 #------------------------------------------------------------------------------# | 
|  | 584 #------------------------------------------------------------------------------# | 
|  | 585 sub deleteBadOrderSensePairs{ | 
|  | 586 | 
|  | 587     my (@tab)=@_; | 
|  | 588     my @tab2; | 
|  | 589 | 
|  | 590     foreach my $v (@tab){ | 
|  | 591 | 
|  | 592 	$v=~s/[\(\)]//g; | 
|  | 593 	push(@tab2,$v) if($v!~/[\$\*\@]$/); | 
|  | 594     } | 
|  | 595     return @tab2; | 
|  | 596 } | 
|  | 597 #------------------------------------------------------------------------------# | 
|  | 598 #------------------------------------------------------------------------------# | 
|  | 599 #gets starts and ends Coords when start=leftmost given positions, directions and orders | 
|  | 600 sub getCoordswithLeftMost { | 
|  | 601 | 
|  | 602     my ($starts,$ends,$positions,$strand,$end_order,$tag_length) = @_; | 
|  | 603 | 
|  | 604     for my $i (0..scalar(@{$positions})-1) { | 
|  | 605 | 
|  | 606 	if($strand->[$i] eq 'F'){ | 
|  | 607 	    $starts->[$i]=$positions->[$i]; | 
|  | 608 	    $ends->[$i]=$positions->[$i]+$tag_length->{$end_order->[$i]}-1; | 
|  | 609 	}else{ | 
|  | 610 	    $starts->[$i]=$positions->[$i]-$tag_length->{$end_order->[$i]}+1; | 
|  | 611 	    $ends->[$i]=$positions->[$i]; | 
|  | 612 	} | 
|  | 613     } | 
|  | 614 } | 
|  | 615 #------------------------------------------------------------------------------# | 
|  | 616 #------------------------------------------------------------------------------# | 
|  | 617 sub floor { | 
|  | 618     my $nb = $_[0]; | 
|  | 619     $nb=~ s/\..*//; | 
|  | 620     return $nb; | 
|  | 621 } | 
|  | 622 #------------------------------------------------------------------------------# | 
|  | 623 #------------------------------------------------------------------------------# | 
|  | 624 sub decimal{ | 
|  | 625 | 
|  | 626   my $num=shift; | 
|  | 627   my $digs_to_cut=shift; | 
|  | 628 | 
|  | 629   $num=sprintf("%.".($digs_to_cut-1)."f", $num) if ($num=~/\d+\.(\d){$digs_to_cut,}/); | 
|  | 630 | 
|  | 631   return $num; | 
|  | 632 } | 
|  | 633 | 
|  | 634 #------------------------------------------------------------------------------# | 
|  | 635 #------------------------------------------------------------------------------# | 
|  | 636 #Sort links according the concerned chromosomes and their coordinates | 
|  | 637 sub sortLinks{ | 
|  | 638 | 
|  | 639     my ($links_file,$sortedlinks_file,$unique)=@_; | 
|  | 640 | 
|  | 641     print LOG "# Sorting links...\n"; | 
|  | 642 | 
|  | 643     my $pipe=($unique)? "| sort -u":""; | 
|  | 644     system "sort -k 1,1 -k 4,4 -k 2,2n -k 5,5n -k 8,8n $links_file $pipe > $sortedlinks_file"; | 
|  | 645 | 
|  | 646 } | 
|  | 647 #------------------------------------------------------------------------------# | 
|  | 648 #------------------------------------------------------------------------------# | 
|  | 649 sub getColor{ | 
|  | 650 | 
|  | 651     my($count,$hcolor,$format)=@_; | 
|  | 652     for my $col ( keys % { $hcolor} ) { | 
|  | 653        return $col if($count>=$hcolor->{$col}->[0] && $count<=$hcolor->{$col}->[1]); | 
|  | 654     } | 
|  | 655     return "white" if($format eq "circos"); | 
|  | 656     return "255,255,255" if($format eq "bed"); | 
|  | 657 } | 
|  | 658 #------------------------------------------------------------------------------# | 
|  | 659 #------------------------------------------------------------------------------# | 
|  | 660 #check if the configuration file is correct | 
|  | 661 sub validateconfiguration{ | 
|  | 662 | 
|  | 663     my %conf=%{$_[0]}; | 
|  | 664     my $list_prgs="@ARGV"; | 
|  | 665 | 
|  | 666     my @circos_params=qw(organism_id colorcode); | 
|  | 667     my @bed_params=qw(colorcode); | 
|  | 668     my @compare_params=qw(list_samples list_read_lengths sample_link_file reference_link_file); | 
|  | 669 | 
|  | 670     unless (defined($conf{general}{output_dir})) { | 
|  | 671 	$conf{general}{output_dir} = "."; | 
|  | 672     } | 
|  | 673     unless (-d $conf{general}{output_dir}){ | 
|  | 674 	mkdir $conf{general}{output_dir} or die; | 
|  | 675     } | 
|  | 676     $conf{general}{output_dir}.="/" if($conf{general}{output_dir}!~/\/$/); | 
|  | 677 | 
|  | 678 | 
|  | 679     if($list_prgs=~/links2compare/){ | 
|  | 680 	foreach my $p (@compare_params) { | 
|  | 681 	    die("Error Config : The compare parameter \"$p\" is not defined\n") if (!defined $conf{compare}{$p}); | 
|  | 682 	} | 
|  | 683 | 
|  | 684 	unless (defined($conf{compare}{same_sv_type})) { | 
|  | 685 	    $conf{compare}{same_sv_type} = 0; | 
|  | 686 	} | 
|  | 687 | 
|  | 688 	unless (defined($conf{compare}{min_overlap})) { | 
|  | 689 	    $conf{compare}{min_overlap} = 1E-9; | 
|  | 690 	} | 
|  | 691 | 
|  | 692 	if($conf{compare}{circos_output}){ | 
|  | 693 	    foreach my $p (@circos_params) { | 
|  | 694 		next if($list_prgs=~/^ratio/ && $p eq "colorcode"); | 
|  | 695 		die("Error Config : The circos parameter \"$p\" is not defined\n") if (!defined $conf{circos}{$p}); | 
|  | 696 	    } | 
|  | 697 	} | 
|  | 698 	if($conf{compare}{bed_output}){ | 
|  | 699 	    foreach my $p (@bed_params) { | 
|  | 700 		die("Error Config : The bed parameter \"$p\" is not defined\n") if (!defined $conf{bed}{$p}); | 
|  | 701 	    } | 
|  | 702 	    die("Error Config : The compare parameter \"list_read_lengths\" is not defined\n") if (!defined $conf{compare}{list_read_lengths}); | 
|  | 703 | 
|  | 704 	    my @samples=split(",",$conf{compare}{list_samples}); | 
|  | 705 	    my @read_lengths=split(",",$conf{compare}{list_read_lengths}); | 
|  | 706 	    for my $i (0..$#samples){ | 
|  | 707 		my @l=split("-",$read_lengths[$i]); | 
|  | 708 		$conf{compare}{read_lengths}{$samples[$i]}={ 1=> $l[0], 2=> $l[1]}; | 
|  | 709 	    } | 
|  | 710 	} | 
|  | 711     } | 
|  | 712 | 
|  | 713 | 
|  | 714 } | 
|  | 715 #------------------------------------------------------------------------------# | 
|  | 716 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::# |