50
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Chentt
|
|
4 #Email: chentt@big.ac.cn
|
|
5 #Date: 2014/4/10
|
|
6 #Modified:
|
|
7 #Description: cluster annotate by priority
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12
|
|
13 my %opts;
|
|
14 GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h");
|
|
15 if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
|
|
16 &usage;
|
|
17 }
|
|
18 #my $genelistout=$opts{'t'};
|
|
19 my $dis=defined $opts{'d'}? $opts{'d'}:1000;
|
|
20 my %gene;
|
|
21
|
|
22 #open OUT,">$genelistout"; #output file
|
|
23 #print OUT "#ID\tchr\tstart\tend\tstrand\ns";
|
|
24 open IN,"<$opts{g}";
|
|
25 while (my $aline=<IN>) {
|
|
26 chomp $aline;
|
|
27 next if($aline=~/^\#/);
|
|
28 my @tmp=split/\t/,$aline;#ID chr start end strand
|
|
29 #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]];
|
|
30 $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]];
|
|
31 }
|
|
32 #while (my $aline=<IN>) {
|
|
33 # chomp $aline;
|
|
34 # next if($aline=~/^\#/);
|
|
35 # my @tmp=split/\t/,$aline;
|
|
36 # my $ID;
|
|
37 # if ($tmp[2] eq "gene") {
|
|
38 # $tmp[0]=~s/Chr\./Chr/;
|
|
39 # $tmp[0]=~s/Chr/chr/;
|
|
40 # my @infor=split/;/,$tmp[8];
|
|
41 # for (my $i=0;$i<@infor ;$i++) {
|
|
42 # if ($infor[$i]=~/Alias\=(\S+)$/) {
|
|
43 # $ID=$1;
|
|
44 # last;
|
|
45 # }
|
|
46 # }
|
|
47 # $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
|
|
48 # print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
|
|
49 # }
|
|
50 #}
|
|
51 close IN;
|
|
52 #close OUT;
|
|
53
|
|
54
|
|
55 my $filein=$opts{'i'};
|
|
56 my $fileout=$opts{'o'};
|
|
57
|
|
58 open IN,"<$filein"; #input file
|
|
59 open OUT,">$fileout"; #output file
|
|
60 while (my $aline=<IN>) {
|
|
61 chomp $aline;
|
|
62 my @tmp=split/\t/,$aline;
|
|
63 if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next}
|
|
64 my @result;
|
|
65 #shift @tmp;
|
|
66 my @id=split/:/,$tmp[0];
|
|
67 $id[0]=~s/Chr0/Chr/;
|
|
68 my @posi=split/-/,$id[1];
|
|
69 foreach my $key (keys %{$gene{$id[0]}}) {
|
|
70 if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) {
|
|
71 push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}";
|
|
72 next;
|
|
73 }
|
|
74 #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) {
|
|
75 if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) {
|
|
76 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
|
|
77 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
|
|
78 next;
|
|
79 }
|
|
80 #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) {
|
|
81 if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) {
|
|
82 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
|
|
83 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
|
|
84 next;
|
|
85 }
|
|
86 }
|
|
87 my $result;
|
|
88 if (!(@result)) {
|
|
89 $result="intergenic";
|
|
90 }
|
|
91 elsif($#result==0){
|
|
92 $result=$result[0];
|
|
93
|
|
94 }
|
|
95 else{
|
|
96 $result=join "\t",@result;
|
|
97 }
|
|
98 # else{
|
|
99 # my $te_num=0;
|
|
100 # my @te_overlap;
|
|
101 # my @te_up_down;
|
|
102 # my @non_overlap;
|
|
103 # my @non_up_down;
|
|
104 # for (my $k=0;$k<@result ;$k++) {
|
|
105 # my @rr=split/\;/,$result[$k];
|
|
106 # if ($rr[3] eq "Y") {
|
|
107 # $te_num++;
|
|
108 # if ($rr[0] eq "overlap") {
|
|
109 # push @te_overlap,$result[$k];
|
|
110 # }
|
|
111 # else{
|
|
112 # push @te_up_down,$result[$k];
|
|
113 # }
|
|
114 # }
|
|
115 # else{
|
|
116 # if ($rr[0] eq "overlap") {
|
|
117 # push @non_overlap,$result[$k];
|
|
118 # }
|
|
119 # else{
|
|
120 # push @non_up_down,$result[$k];
|
|
121 # }
|
|
122 # }
|
|
123 # }
|
|
124 # if ($te_num==0) {#non TE
|
|
125 # if (!(@te_overlap)) {#down up
|
|
126 # if ($#non_up_down==0) {
|
|
127 # $result=$non_up_down[0];
|
|
128 # }
|
|
129 # else{#overlap
|
|
130 # my $all_2=join "\t",@non_up_down;
|
|
131 # $result="up&down1-kb\t".$all_2;
|
|
132 # }
|
|
133 # }
|
|
134 # else{
|
|
135 # $result=join "\t",@non_overlap;
|
|
136 # if ($#non_overlap>=1) {
|
|
137 # print "$aline\t$result\n";
|
|
138 # }
|
|
139 # }
|
|
140 # }
|
|
141 # else{#TE
|
|
142 # if (!(@te_overlap)) {#down up
|
|
143 # if ($#te_up_down==0) {
|
|
144 # $result=$te_up_down[0];
|
|
145 # }
|
|
146 # else{#overlap
|
|
147 # my $all_2=join "\t",@te_up_down;
|
|
148 # $result="up&down1-kb\t".$all_2;
|
|
149 # }
|
|
150 # }
|
|
151 # else{
|
|
152 # $result=join "\t",@te_overlap;
|
|
153 # if ($#te_overlap>=1) {
|
|
154 # print "$aline\t$result\n";
|
|
155 # }
|
|
156 # }
|
|
157 # }
|
|
158 # }
|
|
159 print OUT "$aline\t$result\n";
|
|
160 }
|
|
161
|
|
162 close IN;
|
|
163 close OUT;
|
|
164 sub usage{
|
|
165 print <<"USAGE";
|
|
166 Version $version
|
|
167 Usage:
|
|
168 $0 -i -o -g -d
|
|
169 options:
|
|
170 -i input file
|
|
171 -g genelist file
|
|
172 -d int the length of the upstream and downstream,default 1000
|
|
173 -o output file
|
|
174 -h help
|
|
175 USAGE
|
|
176 exit(1);
|
|
177 }
|
|
178
|