annotate genephys/GenePhys.pl @ 4:3d79224aa2dc draft

Uploaded
author mcharles
date Thu, 30 Oct 2014 06:06:34 -0400
parents 8dfa09868059
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
4
3d79224aa2dc Uploaded
mcharles
parents: 3
diff changeset
2 #V1.1.1 minor bug correction (see in code)
3
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
3 #V1.1.0 integrated gene extraction
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
4 #V1.0.2 integrated segment fasta extraction
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
5 #V1.0.1 added log and option
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
6 #V1.0.0
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
7 use strict;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
8 use warnings;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
9 use Getopt::Long;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
10
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
11 my $input_blast_files;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
12 my $input_genes_position_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
13 my $input_assembly_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
14 my $input_markers_position_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
15 my $input_markers_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
16 my $log_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
17 my $output_fasta_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
18 my $output_segment_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
19 my $output_genes_list_file;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
20 my $EXTRACT_SEQ = "NO";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
21 my $WINDOW = 200000;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
22 my $OFFSET = 100000;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
23 my $MAX_BLAST_LINES = 1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
24
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
25 GetOptions (
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
26 "input_assembly_file=s" => \$input_assembly_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
27 "input_markers_position_file=s" => \$input_markers_position_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
28 "input_markers_file=s" => \$input_markers_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
29 "log_file=s" => \$log_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
30 "output_fasta_file=s" => \$output_fasta_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
31 "output_segment_file=s" => \$output_segment_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
32 "extractseq=s" => \$EXTRACT_SEQ,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
33 "window=i" => \$WINDOW,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
34 "offset=i" =>\$OFFSET,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
35 "input_blast_files=s" => \$input_blast_files,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
36 "input_genes_position_file=s"=> \$input_genes_position_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
37 "output_genes_list_file=s"=>\$output_genes_list_file,
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
38 "max_blast_lines=i" => \$MAX_BLAST_LINES
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
39 ) or die("Error in command line arguments\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
40
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
41 open(LF, ">$log_file") or die("Can't open $log_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
42 #print LF $EXTRACT_SEQ."\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
43
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
44 my $current_annotation="";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
45 my @list_marquer;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
46 my %chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
47 my %position;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
48
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
49 open(MP, $input_markers_position_file) or die("Can't open $input_markers_position_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
50
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
51 my $compt=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
52 while (my $line=<MP>){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
53 $compt++;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
54 my @cols = split(/\t/,$line);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
55 if ($#cols != 3){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
56 print STDERR "Error in marker position file format\n$compt : $line\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
57 exit(0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
58 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
59 my %current;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
60 # Number#Map#Name#Chr#Position#GeneAT#FunctionAT
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
61 my $Name = $cols[0];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
62 my $Locus = $cols[1];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
63 my $Chr = $cols[2];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
64 my $Position = $cols[3];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
65
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
66
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
67 $chr{$Name} = $Chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
68 $position{$Name} = $Position;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
69
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
70 ### Modification 0.9.9
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
71 if ($Locus ne $Name){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
72 $chr{$Locus} = $Chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
73 $position{$Locus} = $Position;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
74 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
75 ###
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
76
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
77 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
78 close (MP);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
79
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
80 open(MA, $input_markers_file) or die("Can't open $input_markers_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
81 while (my $line=<MA>){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
82 my @cols = split (/\s+/,$line);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
83 for (my $i=0;$i<=$#cols;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
84 my $current = $cols[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
85 chomp($current);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
86 if ($current !~ /^\s+$/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
87 push(@list_marquer,$current);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
88 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
89 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
90 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
91 close (MA);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
92
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
93 my %coord_by_chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
94 for (my $i=0;$i<=$#list_marquer;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
95 my $current_name = $list_marquer[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
96 my $current_chr = $chr{$current_name};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
97 my $current_position = $position{$current_name};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
98
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
99 if ($current_position =~ /^\d+$/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
100 my @tbl_coord_for_current_chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
101 if ($coord_by_chr{$current_chr}){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
102 @tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
103 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
104 push(@tbl_coord_for_current_chr,$current_position);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
105 $coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
106 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
107 elsif (($current_position =~/\s*-\s*/)||($current_position =~/none/i)){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
108
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
109 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
110 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
111 chomp($current_position);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
112 print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
113 print $list_marquer[$i],"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
114 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
115 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
116
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
117 open(OS, ">$output_segment_file") or die ("Can't open $output_segment_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
118
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
119 my @segment_chr;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
120 my @segment_start;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
121 my @segment_end;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
122
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
123 foreach my $key (sort keys %coord_by_chr){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
124 my @tbl_coord = @{$coord_by_chr{$key}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
125 @tbl_coord = sort { $a <=> $b } @tbl_coord;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
126 my $current_start;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
127 my $current_stop;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
128 my $current_start_with_offset;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
129 my $current_stop_with_offset;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
130
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
131 for (my $i=0;$i<=$#tbl_coord;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
132 if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
133
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
134 # print "$i : $current_start / $current_stop\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
135 if ($tbl_coord[$i]>$current_stop+$WINDOW){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
136 #OFFSET
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
137 if ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
138 $current_stop_with_offset = $current_stop + $OFFSET;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
139 #######
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
140 print OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
141 push(@segment_chr,$key);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
142 push(@segment_start,$current_start_with_offset);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
143 push(@segment_end,$current_stop_with_offset);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
144
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
145 $current_start = $tbl_coord[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
146 $current_stop = $tbl_coord[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
147
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
148 if ($i==$#tbl_coord){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
149 #OFFSET
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
150 if ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
151 $current_stop_with_offset = $current_stop + $OFFSET;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
152 #######
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
153 print OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
154 push(@segment_chr,$key);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
155 push(@segment_start,$current_start_with_offset);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
156 push(@segment_end,$current_stop_with_offset);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
157 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
158 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
159 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
160 $current_stop=$tbl_coord[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
161 if ($i==$#tbl_coord){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
162 #OFFSET
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
163 if ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;}
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
164 $current_stop_with_offset = $current_stop + $OFFSET;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
165 #######
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
166 print OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
167 push(@segment_chr,$key);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
168 push(@segment_start,$current_start_with_offset);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
169 push(@segment_end,$current_stop_with_offset);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
170 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
171 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
172 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
173 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
174 close(OS);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
175
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
176 ### Sequence extraction
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
177 if ($EXTRACT_SEQ eq "YES"){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
178
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
179 my %genome;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
180 my $current_header;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
181 my $current_seq="";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
182 open(AF, $input_assembly_file) or die ("Can't open $input_assembly_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
183
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
184 while (my $ligne = <AF>){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
185 if ($ligne =~ /^\>(.*?)\s*$/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
186 if ($current_header){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
187 $genome{$current_header} = $current_seq;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
188 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
189 $current_header=$1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
190 $current_seq = "";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
191 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
192 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
193 if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
194 $current_seq .= $1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
195 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
196 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
197 print STDERR "Erreur Parsing n°1\n$ligne\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
198 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
199 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
200 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
201
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
202 #TRAITEMENT DU DERNIER
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
203 if ($current_header){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
204 $genome{$current_header} = $current_seq;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
205 undef($current_seq);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
206 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
207 close (AF);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
208
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
209 open(OF, ">$output_fasta_file") or die ("Can't open $output_fasta_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
210 for (my $i=0;$i<=$#segment_chr;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
211 my $compt=0;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
212 my $current_seq="";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
213 print OF ">",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i]."\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
214 ### Modification 0.9.9
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
215 if ($segment_end[$i]>length($genome{$segment_chr[$i]})){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
216 $segment_end[$i] = length($genome{$segment_chr[$i]});
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
217 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
218 ###
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
219
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
220 my @SEQ = split(//,$genome{$segment_chr[$i]});
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
221 for (my $coord = $segment_start[$i]-1; $coord<=$segment_end[$i]-1;$coord++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
222 $compt++;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
223 if ($compt > 60 ){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
224 $current_seq .= "\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
225 $compt=1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
226 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
227 $current_seq .= $SEQ[$coord];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
228
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
229 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
230 print OF "$current_seq\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
231 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
232 close (OF);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
233 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
234
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
235 ### GENE and BLAST Extraction
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
236 my @blast_by_base;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
237 my @header;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
238
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
239
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
240 my @blastfiles = split(/\,/,$input_blast_files);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
241 for (my $i=0;$i<=$#blastfiles;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
242 my $current_blast_file = $blastfiles[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
243 my $current_blast_header = "DEFAULT";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
244 my %current_blast;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
245 open (B,"$current_blast_file") or die ("Can't open $current_blast_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
246 while (my $line =<B>){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
247 if ($line =~ /^\#\#(.*?)$/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
248 $current_blast_header = $1;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
249 print LF $current_blast_header."\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
250 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
251 elsif ($line =~ /^\#/){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
252 # blast file column legend
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
253 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
254 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
255 my @fields = split(/\s+/,$line);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
256 my $gene_id = $fields[0];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
257 my @blast_for_this_gene;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
258 if ($current_blast{$gene_id}){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
259 @blast_for_this_gene = @{$current_blast{$gene_id}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
260 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
261
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
262 if ($#blast_for_this_gene<$MAX_BLAST_LINES-1){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
263 push(@blast_for_this_gene,$line);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
264 print LF $gene_id,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
265 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
266 $current_blast{$gene_id}=\@blast_for_this_gene;
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
267 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
268 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
269 close(B);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
270 push (@blast_by_base,\%current_blast);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
271 push (@header,$current_blast_header);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
272 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
273
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
274
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
275 open (OGL,">$output_genes_list_file") or die ("Can't open $output_genes_list_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
276
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
277 for (my $i=0;$i<=$#segment_chr;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
278 my $segment_chr = $segment_chr[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
279 my $segment_start = $segment_start[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
280 my $segment_end = $segment_end[$i];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
281
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
282 print OGL "#",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i],"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
283
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
284 open(IG, $input_genes_position_file) or die("Can't open $input_genes_position_file\n");
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
285 while (my $gene_desc=<IG>){
4
3d79224aa2dc Uploaded
mcharles
parents: 3
diff changeset
286 my @gene_desc = split(/\t+/,$gene_desc);#Modification in 1.1.1 : \s => \t
3
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
287 if ($#gene_desc != 4){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
288 print STDERR "Error in gene position file format\n$gene_desc\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
289 exit(0);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
290 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
291 my $gene_id = $gene_desc[0];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
292 my $cds_id = $gene_desc[1];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
293 my $gene_chr = $gene_desc[2];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
294 my $gene_start = $gene_desc[3];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
295 my $gene_end = $gene_desc[4];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
296 if ($segment_chr eq $gene_chr){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
297 if ((($gene_start>=$segment_start)&&($gene_start<=$segment_end))||(($gene_end>=$segment_start)&&($gene_end<=$segment_end))){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
298 print OGL $gene_id," / ",$cds_id,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
299
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
300 for (my $i=0;$i<=$#blast_by_base;$i++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
301 #print LF $header[$i]."\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
302 my %current_blast = %{$blast_by_base[$i]};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
303 if ($current_blast{$cds_id}){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
304 my @blast_by_gene = @{$current_blast{$cds_id}};
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
305 #print LF $#blast_by_gene."\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
306 for (my $j=0;$j<=$#blast_by_gene;$j++){
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
307 my @fields = split(/\t/,$blast_by_gene[$j]);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
308 print OGL $header[$i],"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
309 print OGL $fields[1],"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
310 print OGL $fields[3],"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
311 print OGL $fields[4],"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
312 print OGL $fields[5],"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
313 print OGL $fields[10],"\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
314 print OGL $fields[6],"..",$fields[7],"(",$fields[11],")","\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
315 print OGL $fields[8],"..",$fields[9],"(",$fields[12],")","\t";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
316 print OGL $fields[13];
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
317 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
318 print OGL "\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
319 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
320 else {
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
321 print OGL $header[$i],"\t","No BLAST results\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
322 print LF $gene_id," / ",$cds_id,"\n";
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
323 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
324 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
325 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
326 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
327
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
328 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
329 close(IG);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
330 }
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
331
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
332
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
333 close (OGL);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
334
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
335 close (LF);
8dfa09868059 Uploaded
mcharles
parents:
diff changeset
336