Mercurial > repos > jnavarro > creatematrix
comparison CreateMatrixMultiple.pl @ 2:cabe6c55780a draft
Uploaded
| author | jnavarro |
|---|---|
| date | Thu, 26 Oct 2017 07:37:17 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:9e0251d2ecb5 | 2:cabe6c55780a |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 #V1.0.0 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use Getopt::Long; | |
| 6 | |
| 7 | |
| 8 my $input_matrix_files; | |
| 9 GetOptions ( | |
| 10 "input_matrix_files=s" => \$input_matrix_files | |
| 11 ) or die("Error in command line arguments\n"); | |
| 12 | |
| 13 my @files = split(/,/,$input_matrix_files); | |
| 14 my @tbl_hash; | |
| 15 my %global_hash; | |
| 16 my @tbl_genotype_names; | |
| 17 my %chromosome_hash; | |
| 18 my %result_by_chr; | |
| 19 my @tbl_chr_name; | |
| 20 | |
| 21 for (my $i=0;$i<=$#files;$i++){ | |
| 22 my $current_file = $files[$i]; | |
| 23 my $current_genotype = "NA"; | |
| 24 my %current_hash; | |
| 25 | |
| 26 open (CF,$current_file) or die ("Can't open file $current_file\n"); | |
| 27 my $header = <CF>; | |
| 28 if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){ | |
| 29 $current_genotype = $1; | |
| 30 } | |
| 31 else { | |
| 32 print STDERR "Unable to recognize header in matrix file\n$header\n"; | |
| 33 exit(0); | |
| 34 } | |
| 35 while (my $line=<CF>){ | |
| 36 if ($line!~/^\s*$/){ | |
| 37 my @fields = split (/\t+/,$line); | |
| 38 my $chr; | |
| 39 my $pos; | |
| 40 my $ref; | |
| 41 my $variant; | |
| 42 | |
| 43 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ | |
| 44 $chr = $1; | |
| 45 } | |
| 46 else { | |
| 47 print STDERR "Unable to detect chromosome in matrix file\n$line\n"; | |
| 48 exit(0); | |
| 49 } | |
| 50 if ($fields[1]=~/^\s*(\d+)\s*$/){ | |
| 51 $pos = $1; | |
| 52 } | |
| 53 else { | |
| 54 print STDERR "Unable to detect position in matrix file\n$line\n"; | |
| 55 exit(0); | |
| 56 } | |
| 57 | |
| 58 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ | |
| 59 $ref = $1; | |
| 60 } | |
| 61 else { | |
| 62 print STDERR "Unable to detect reference base in matrix file\n$line\n"; | |
| 63 exit(0); | |
| 64 } | |
| 65 | |
| 66 if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){ | |
| 67 $variant = $1; | |
| 68 } | |
| 69 else { | |
| 70 print STDERR "Unable to detect variant in matrix file\n$line\n"; | |
| 71 exit(0); | |
| 72 } | |
| 73 $current_hash{"$chr#$pos"} = $variant; | |
| 74 $global_hash{"$chr#$pos"} = $ref; | |
| 75 } | |
| 76 } | |
| 77 close CF; | |
| 78 push(@tbl_genotype_names,$current_genotype); | |
| 79 push(@tbl_hash,\%current_hash); | |
| 80 } | |
| 81 | |
| 82 print "Chrom\tPos\tRef"; | |
| 83 for (my $i=0;$i<=$#tbl_genotype_names;$i++){ | |
| 84 print "\t".$tbl_genotype_names[$i]; | |
| 85 } | |
| 86 print "\n"; | |
| 87 | |
| 88 my @tbl_line_to_display; | |
| 89 | |
| 90 #exit(0); | |
| 91 | |
| 92 foreach my $key (keys %global_hash){ | |
| 93 my @tbl = split (/\#/,$key); | |
| 94 my $chr = $tbl[0]; | |
| 95 my $pos = $tbl[1]; | |
| 96 my $ref = $global_hash{$key}; | |
| 97 my $line = "$chr\t$pos\t$ref"; | |
| 98 my $isvariant = 0; | |
| 99 for (my $i=0;$i<=$#tbl_hash;$i++){ | |
| 100 #my %current_hash = %{$tbl_hash[$i]}; | |
| 101 if ($tbl_hash[$i]->{$key}){ | |
| 102 $line.="\t".$tbl_hash[$i]->{$key}; | |
| 103 if ($tbl_hash[$i]->{$key} ne $ref){ | |
| 104 $isvariant = 1; | |
| 105 } | |
| 106 } | |
| 107 else { | |
| 108 $line.="\t"."NA"; | |
| 109 } | |
| 110 | |
| 111 } | |
| 112 | |
| 113 $line .="\n"; | |
| 114 if ($isvariant == 1){ | |
| 115 push(@tbl_line_to_display,$line); | |
| 116 } | |
| 117 } | |
| 118 | |
| 119 | |
| 120 for (my $i=0;$i<=$#tbl_line_to_display;$i++){ | |
| 121 my @tbl = split (/\s+/,$tbl_line_to_display[$i]); | |
| 122 my $current_chr = $tbl[0]; | |
| 123 my @current_tbl; | |
| 124 if ($result_by_chr{$current_chr}){ | |
| 125 push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]); | |
| 126 } | |
| 127 else { | |
| 128 push (@current_tbl,$tbl_line_to_display[$i]); | |
| 129 $result_by_chr{$current_chr} = \@current_tbl; | |
| 130 } | |
| 131 } | |
| 132 | |
| 133 foreach my $key (sort keys %result_by_chr){ | |
| 134 my @current_tbl = sort mysort @{$result_by_chr{$key}}; | |
| 135 for (my $i=0;$i<=$#current_tbl;$i++){ | |
| 136 print $current_tbl[$i]; | |
| 137 } | |
| 138 } | |
| 139 | |
| 140 sub mysort { | |
| 141 my @tbla = split (/\s+/,$a); | |
| 142 my @tblb = split (/\s+/,$b); | |
| 143 $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1]; | |
| 144 } | |
| 145 | |
| 146 |
