annotate rapsodyn/CreateMatrixMultiple.pl @ 15:56d328bce3a7 draft default tip

Uploaded
author mcharles
date Thu, 29 Jan 2015 08:54:06 -0500
parents 0a6c1cfe4dc8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
2 #V1.0.0
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
3 use strict;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
4 use warnings;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
5 use Getopt::Long;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
6
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
7
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
8 my $input_matrix_files;
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
9 GetOptions (
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
10 "input_matrix_files=s" => \$input_matrix_files
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
11 ) or die("Error in command line arguments\n");
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
12
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
13 my @files = split(/,/,$input_matrix_files);
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
14 my @tbl_hash;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
15 my %global_hash;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
16 my @tbl_genotype_names;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
17 my %chromosome_hash;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
18 my %result_by_chr;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
19 my @tbl_chr_name;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
20
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
21 for (my $i=0;$i<=$#files;$i++){
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
22 my $current_file = $files[$i];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
23 my $current_genotype = "NA";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
24 my %current_hash;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
25
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
26 open (CF,$current_file) or die ("Can't open file $current_file\n");
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
27 my $header = <CF>;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
28 if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
29 $current_genotype = $1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
30 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
31 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
32 print STDERR "Unable to recognize header in matrix file\n$header\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
33 exit(0);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
34 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
35 while (my $line=<CF>){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
36 if ($line!~/^\s*$/){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
37 my @fields = split (/\t+/,$line);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
38 my $chr;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
39 my $pos;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
40 my $ref;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
41 my $variant;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
42
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
43 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
44 $chr = $1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
45 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
46 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
47 print STDERR "Unable to detect chromosome in matrix file\n$line\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
48 exit(0);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
49 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
50 if ($fields[1]=~/^\s*(\d+)\s*$/){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
51 $pos = $1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
52 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
53 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
54 print STDERR "Unable to detect position in matrix file\n$line\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
55 exit(0);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
56 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
57
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
58 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
59 $ref = $1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
60 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
61 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
62 print STDERR "Unable to detect reference base in matrix file\n$line\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
63 exit(0);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
64 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
65
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
66 if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
67 $variant = $1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
68 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
69 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
70 print STDERR "Unable to detect variant in matrix file\n$line\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
71 exit(0);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
72 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
73 $current_hash{"$chr#$pos"} = $variant;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
74 $global_hash{"$chr#$pos"} = $ref;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
75 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
76 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
77 close CF;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
78 push(@tbl_genotype_names,$current_genotype);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
79 push(@tbl_hash,\%current_hash);
10
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
80 }
0a6c1cfe4dc8 Uploaded
mcharles
parents:
diff changeset
81
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
82 print "Chrom\tPos\tRef";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
83 for (my $i=0;$i<=$#tbl_genotype_names;$i++){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
84 print "\t".$tbl_genotype_names[$i];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
85 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
86 print "\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
87
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
88 my @tbl_line_to_display;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
89
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
90 #exit(0);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
91
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
92 foreach my $key (keys %global_hash){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
93 my @tbl = split (/\#/,$key);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
94 my $chr = $tbl[0];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
95 my $pos = $tbl[1];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
96 my $ref = $global_hash{$key};
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
97 my $line = "$chr\t$pos\t$ref";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
98 my $isvariant = 0;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
99 for (my $i=0;$i<=$#tbl_hash;$i++){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
100 #my %current_hash = %{$tbl_hash[$i]};
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
101 if ($tbl_hash[$i]->{$key}){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
102 $line.="\t".$tbl_hash[$i]->{$key};
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
103 if ($tbl_hash[$i]->{$key} ne $ref){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
104 $isvariant = 1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
105 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
106 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
107 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
108 $line.="\t"."NA";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
109 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
110
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
111 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
112
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
113 $line .="\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
114 if ($isvariant == 1){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
115 push(@tbl_line_to_display,$line);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
116 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
117 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
118
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
119
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
120 for (my $i=0;$i<=$#tbl_line_to_display;$i++){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
121 my @tbl = split (/\s+/,$tbl_line_to_display[$i]);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
122 my $current_chr = $tbl[0];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
123 my @current_tbl;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
124 if ($result_by_chr{$current_chr}){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
125 push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
126 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
127 else {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
128 push (@current_tbl,$tbl_line_to_display[$i]);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
129 $result_by_chr{$current_chr} = \@current_tbl;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
130 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
131 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
132
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
133 foreach my $key (sort keys %result_by_chr){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
134 my @current_tbl = sort mysort @{$result_by_chr{$key}};
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
135 for (my $i=0;$i<=$#current_tbl;$i++){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
136 print $current_tbl[$i];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
137 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
138 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
139
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
140 sub mysort {
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
141 my @tbla = split (/\s+/,$a);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
142 my @tblb = split (/\s+/,$b);
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
143 $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1];
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
144 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
145
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
146