comparison rapsodyn/CreateMatrixMultiple.pl @ 15:56d328bce3a7 draft default tip

Uploaded
author mcharles
date Thu, 29 Jan 2015 08:54:06 -0500
parents 0a6c1cfe4dc8
children
comparison
equal deleted inserted replaced
14:93e6f2af1ce2 15:56d328bce3a7
9 GetOptions ( 9 GetOptions (
10 "input_matrix_files=s" => \$input_matrix_files 10 "input_matrix_files=s" => \$input_matrix_files
11 ) or die("Error in command line arguments\n"); 11 ) or die("Error in command line arguments\n");
12 12
13 my @files = split(/,/,$input_matrix_files); 13 my @files = split(/,/,$input_matrix_files);
14 my @tbl_hash;
15 my %global_hash;
16 my @tbl_genotype_names;
17 my %chromosome_hash;
18 my %result_by_chr;
19 my @tbl_chr_name;
20
14 for (my $i=0;$i<=$#files;$i++){ 21 for (my $i=0;$i<=$#files;$i++){
15 print $files[$i],"\n"; 22 my $current_file = $files[$i];
23 my $current_genotype = "NA";
24 my %current_hash;
25
26 open (CF,$current_file) or die ("Can't open file $current_file\n");
27 my $header = <CF>;
28 if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){
29 $current_genotype = $1;
30 }
31 else {
32 print STDERR "Unable to recognize header in matrix file\n$header\n";
33 exit(0);
34 }
35 while (my $line=<CF>){
36 if ($line!~/^\s*$/){
37 my @fields = split (/\t+/,$line);
38 my $chr;
39 my $pos;
40 my $ref;
41 my $variant;
42
43 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){
44 $chr = $1;
45 }
46 else {
47 print STDERR "Unable to detect chromosome in matrix file\n$line\n";
48 exit(0);
49 }
50 if ($fields[1]=~/^\s*(\d+)\s*$/){
51 $pos = $1;
52 }
53 else {
54 print STDERR "Unable to detect position in matrix file\n$line\n";
55 exit(0);
56 }
57
58 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){
59 $ref = $1;
60 }
61 else {
62 print STDERR "Unable to detect reference base in matrix file\n$line\n";
63 exit(0);
64 }
65
66 if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){
67 $variant = $1;
68 }
69 else {
70 print STDERR "Unable to detect variant in matrix file\n$line\n";
71 exit(0);
72 }
73 $current_hash{"$chr#$pos"} = $variant;
74 $global_hash{"$chr#$pos"} = $ref;
75 }
76 }
77 close CF;
78 push(@tbl_genotype_names,$current_genotype);
79 push(@tbl_hash,\%current_hash);
16 } 80 }
17 81
82 print "Chrom\tPos\tRef";
83 for (my $i=0;$i<=$#tbl_genotype_names;$i++){
84 print "\t".$tbl_genotype_names[$i];
85 }
86 print "\n";
87
88 my @tbl_line_to_display;
89
90 #exit(0);
91
92 foreach my $key (keys %global_hash){
93 my @tbl = split (/\#/,$key);
94 my $chr = $tbl[0];
95 my $pos = $tbl[1];
96 my $ref = $global_hash{$key};
97 my $line = "$chr\t$pos\t$ref";
98 my $isvariant = 0;
99 for (my $i=0;$i<=$#tbl_hash;$i++){
100 #my %current_hash = %{$tbl_hash[$i]};
101 if ($tbl_hash[$i]->{$key}){
102 $line.="\t".$tbl_hash[$i]->{$key};
103 if ($tbl_hash[$i]->{$key} ne $ref){
104 $isvariant = 1;
105 }
106 }
107 else {
108 $line.="\t"."NA";
109 }
110
111 }
112
113 $line .="\n";
114 if ($isvariant == 1){
115 push(@tbl_line_to_display,$line);
116 }
117 }
118
119
120 for (my $i=0;$i<=$#tbl_line_to_display;$i++){
121 my @tbl = split (/\s+/,$tbl_line_to_display[$i]);
122 my $current_chr = $tbl[0];
123 my @current_tbl;
124 if ($result_by_chr{$current_chr}){
125 push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]);
126 }
127 else {
128 push (@current_tbl,$tbl_line_to_display[$i]);
129 $result_by_chr{$current_chr} = \@current_tbl;
130 }
131 }
132
133 foreach my $key (sort keys %result_by_chr){
134 my @current_tbl = sort mysort @{$result_by_chr{$key}};
135 for (my $i=0;$i<=$#current_tbl;$i++){
136 print $current_tbl[$i];
137 }
138 }
139
140 sub mysort {
141 my @tbla = split (/\s+/,$a);
142 my @tblb = split (/\s+/,$b);
143 $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1];
144 }
145
146