Mercurial > repos > big-tiandm > mirplant2
comparison Annotate.pl @ 50:7b5a48b972e9 draft
Uploaded
author | big-tiandm |
---|---|
date | Fri, 05 Dec 2014 00:11:02 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
49:f008ab2cadc6 | 50:7b5a48b972e9 |
---|---|
1 #!/usr/bin/perl -w | |
2 #Filename: | |
3 #Author: Chentt | |
4 #Email: chentt@big.ac.cn | |
5 #Date: 2014/4/10 | |
6 #Modified: | |
7 #Description: cluster annotate by priority | |
8 my $version=1.00; | |
9 | |
10 use strict; | |
11 use Getopt::Long; | |
12 | |
13 my %opts; | |
14 GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h"); | |
15 if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments | |
16 &usage; | |
17 } | |
18 #my $genelistout=$opts{'t'}; | |
19 my $dis=defined $opts{'d'}? $opts{'d'}:1000; | |
20 my %gene; | |
21 | |
22 #open OUT,">$genelistout"; #output file | |
23 #print OUT "#ID\tchr\tstart\tend\tstrand\ns"; | |
24 open IN,"<$opts{g}"; | |
25 while (my $aline=<IN>) { | |
26 chomp $aline; | |
27 next if($aline=~/^\#/); | |
28 my @tmp=split/\t/,$aline;#ID chr start end strand | |
29 #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; | |
30 $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]]; | |
31 } | |
32 #while (my $aline=<IN>) { | |
33 # chomp $aline; | |
34 # next if($aline=~/^\#/); | |
35 # my @tmp=split/\t/,$aline; | |
36 # my $ID; | |
37 # if ($tmp[2] eq "gene") { | |
38 # $tmp[0]=~s/Chr\./Chr/; | |
39 # $tmp[0]=~s/Chr/chr/; | |
40 # my @infor=split/;/,$tmp[8]; | |
41 # for (my $i=0;$i<@infor ;$i++) { | |
42 # if ($infor[$i]=~/Alias\=(\S+)$/) { | |
43 # $ID=$1; | |
44 # last; | |
45 # } | |
46 # } | |
47 # $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] | |
48 # print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; | |
49 # } | |
50 #} | |
51 close IN; | |
52 #close OUT; | |
53 | |
54 | |
55 my $filein=$opts{'i'}; | |
56 my $fileout=$opts{'o'}; | |
57 | |
58 open IN,"<$filein"; #input file | |
59 open OUT,">$fileout"; #output file | |
60 while (my $aline=<IN>) { | |
61 chomp $aline; | |
62 my @tmp=split/\t/,$aline; | |
63 if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next} | |
64 my @result; | |
65 #shift @tmp; | |
66 my @id=split/:/,$tmp[0]; | |
67 $id[0]=~s/Chr0/Chr/; | |
68 my @posi=split/-/,$id[1]; | |
69 foreach my $key (keys %{$gene{$id[0]}}) { | |
70 if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) { | |
71 push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}"; | |
72 next; | |
73 } | |
74 #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) { | |
75 if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) { | |
76 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); | |
77 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); | |
78 next; | |
79 } | |
80 #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) { | |
81 if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) { | |
82 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); | |
83 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); | |
84 next; | |
85 } | |
86 } | |
87 my $result; | |
88 if (!(@result)) { | |
89 $result="intergenic"; | |
90 } | |
91 elsif($#result==0){ | |
92 $result=$result[0]; | |
93 | |
94 } | |
95 else{ | |
96 $result=join "\t",@result; | |
97 } | |
98 # else{ | |
99 # my $te_num=0; | |
100 # my @te_overlap; | |
101 # my @te_up_down; | |
102 # my @non_overlap; | |
103 # my @non_up_down; | |
104 # for (my $k=0;$k<@result ;$k++) { | |
105 # my @rr=split/\;/,$result[$k]; | |
106 # if ($rr[3] eq "Y") { | |
107 # $te_num++; | |
108 # if ($rr[0] eq "overlap") { | |
109 # push @te_overlap,$result[$k]; | |
110 # } | |
111 # else{ | |
112 # push @te_up_down,$result[$k]; | |
113 # } | |
114 # } | |
115 # else{ | |
116 # if ($rr[0] eq "overlap") { | |
117 # push @non_overlap,$result[$k]; | |
118 # } | |
119 # else{ | |
120 # push @non_up_down,$result[$k]; | |
121 # } | |
122 # } | |
123 # } | |
124 # if ($te_num==0) {#non TE | |
125 # if (!(@te_overlap)) {#down up | |
126 # if ($#non_up_down==0) { | |
127 # $result=$non_up_down[0]; | |
128 # } | |
129 # else{#overlap | |
130 # my $all_2=join "\t",@non_up_down; | |
131 # $result="up&down1-kb\t".$all_2; | |
132 # } | |
133 # } | |
134 # else{ | |
135 # $result=join "\t",@non_overlap; | |
136 # if ($#non_overlap>=1) { | |
137 # print "$aline\t$result\n"; | |
138 # } | |
139 # } | |
140 # } | |
141 # else{#TE | |
142 # if (!(@te_overlap)) {#down up | |
143 # if ($#te_up_down==0) { | |
144 # $result=$te_up_down[0]; | |
145 # } | |
146 # else{#overlap | |
147 # my $all_2=join "\t",@te_up_down; | |
148 # $result="up&down1-kb\t".$all_2; | |
149 # } | |
150 # } | |
151 # else{ | |
152 # $result=join "\t",@te_overlap; | |
153 # if ($#te_overlap>=1) { | |
154 # print "$aline\t$result\n"; | |
155 # } | |
156 # } | |
157 # } | |
158 # } | |
159 print OUT "$aline\t$result\n"; | |
160 } | |
161 | |
162 close IN; | |
163 close OUT; | |
164 sub usage{ | |
165 print <<"USAGE"; | |
166 Version $version | |
167 Usage: | |
168 $0 -i -o -g -d | |
169 options: | |
170 -i input file | |
171 -g genelist file | |
172 -d int the length of the upstream and downstream,default 1000 | |
173 -o output file | |
174 -h help | |
175 USAGE | |
176 exit(1); | |
177 } | |
178 |