Next changeset 1:9e0251d2ecb5 (2017-10-26) |
Commit message:
Uploaded |
added:
CreateMatrix.pl |
b |
diff -r 000000000000 -r e94977a3dcad CreateMatrix.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CreateMatrix.pl Thu Oct 26 06:41:56 2017 -0400 |
[ |
@@ -0,0 +1,174 @@ +#!/usr/bin/perl +#V1.0.0 +use strict; +use warnings; +use Getopt::Long; + +my $input_pileup_file; +my $input_mpileupFiltred_file; +my $input_name; + +GetOptions ( +"input_name=s" => \$input_name, +"input_pileup_file=s" => \$input_pileup_file, +"input_mpileupFiltred_file=s" => \$input_mpileupFiltred_file +) or die("Error in command line arguments\n"); + + +print "Chrom\tPos\tRef\t$input_name\n"; + +my %mpileupFiltred; +open (VU,$input_mpileupFiltred_file) or die ("Can't open mpileupFiltred file\n"); +while (my $line=<VU>){ + if ($line!~/^\s*$/){ + my @fields = split (/\t+/,$line); + my $chr; + my $pos; + if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ + $chr = $1; + } + else { + print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; + exit(0); + } + if ($fields[1]=~/^\s*(\d+)\s*$/){ + $pos = $1; + } + else { + print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; + exit(0); + } + if ($#fields>=4){ + $mpileupFiltred{"$chr#$pos"}=$fields[4]; + } + else { + print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; + } + } +} +close VU; + +my %covered; + +my $compt=0; +open (PU,$input_pileup_file) or die ("Can't open pileup file\n"); +while (my $line=<PU>){ + #$compt++; + #if ($compt>200000){last;} + if ($line!~/^\s*$/){ + my @fields = split (/\t+/,$line); + my $chr; + my $pos; + my $ref; + my $depth; + my $pile; + + if ($#fields>=4){ + if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ + $chr = $1; + } + else { + print STDERR "Unable to detect chromosome in pileup file\n$line\n"; + exit(0); + } + if ($fields[1]=~/^\s*(\d+)\s*$/){ + $pos = $1; + } + else { + print STDERR "Unable to detect position in pileup file\n$line\n"; + exit(0); + } + + if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ + $ref = $1; + } + else { + print STDERR "Unable to detect reference base in pileup file\n$line\n"; + exit(0); + } + + if ($fields[3]=~/^\s*(\d+)\s*$/i){ + $depth = $1; + } + else { + print STDERR "Unable to detect depth in pileup file\n$line\n"; + exit(0); + } + + $pile = $fields[4]; + $pile =~ s/\$//g; #the read start at this position + $pile =~ s/\^.//g; #the read end at this position followed by quality char + + if ($mpileupFiltred{"$chr#$pos"}) { + $pile = $mpileupFiltred{"$chr#$pos"}; + $pile =~ s/\$//g; #the read start at this position + $pile =~ s/\^.//g; #the read end at this position followed by quality char + my $ori_pile = $pile; + + my @detail = split(//,$pile); + my $current_in=""; + my $current_del=""; + my $current_length=0; + my $noindel_pile=""; + my @IN; + my @DEL; + + $noindel_pile = $pile; + while ($noindel_pile =~ /\+(\d+)/){ + my $length = $1; + $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ; + push(@IN,$1); + #print "test : $pile / $noindel_pile\n"; + } + while ($noindel_pile =~ /\-(\d+)/){ + my $length = $1; + $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ; + push(@DEL,$1); + #print "test : $pile / $noindel_pile\n"; + } + + my %variant_type; + + my $indel_pile = $ori_pile; + $variant_type{"IN"} = ($indel_pile =~ s/\+//gi); + $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi); + + my $base_pile = $noindel_pile; + $variant_type{"A"} = ($base_pile =~ s/a//gi); + $variant_type{"T"} = ($base_pile =~ s/t//gi); + $variant_type{"C"} = ($base_pile =~ s/c//gi); + $variant_type{"G"} = ($base_pile =~ s/g//gi); + $variant_type{"N"} = ($base_pile =~ s/n//gi); + + my $top_number=0; + my $top_variant=""; + foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){ + if ($variant_type{$key} > 0){ + if ($variant_type{$key} > $top_number){ + $top_number = $variant_type{$key}; + $top_variant = $key; + } + elsif ($variant_type{$key} == $top_number){ + $top_variant .= "/$key"; + } + } + + } + print "$chr\t$pos\t$ref\t$top_variant\n"; + + } + elsif (($pile=~/[\+\-]/)||($pile=~/[ATGCN]/i)){ + print "$chr\t$pos\t$ref\tX\n"; + } + else { + print "$chr\t$pos\t$ref\t$ref\n"; + } + } + else { + print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; + } + } +} +close PU; + + |