comparison Annotate.pl @ 0:07745c0958dd draft

Uploaded
author big-tiandm
date Thu, 18 Sep 2014 21:40:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:07745c0958dd
1 #!/usr/bin/perl -w
2 #Filename:
3 #Author: Chentt
4 #Email: chentt@big.ac.cn
5 #Date: 2014/4/10
6 #Modified:
7 #Description: cluster annotate by priority
8 my $version=1.00;
9
10 use strict;
11 use Getopt::Long;
12
13 my %opts;
14 GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h");
15 if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
16 &usage;
17 }
18 #my $genelistout=$opts{'t'};
19 my $dis=defined $opts{'d'}? $opts{'d'}:1000;
20 my %gene;
21
22 #open OUT,">$genelistout"; #output file
23 #print OUT "#ID\tchr\tstart\tend\tstrand\ns";
24 open IN,"<$opts{g}";
25 while (my $aline=<IN>) {
26 chomp $aline;
27 next if($aline=~/^\#/);
28 my @tmp=split/\t/,$aline;#ID chr start end strand
29 #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]];
30 $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]];
31 }
32 #while (my $aline=<IN>) {
33 # chomp $aline;
34 # next if($aline=~/^\#/);
35 # my @tmp=split/\t/,$aline;
36 # my $ID;
37 # if ($tmp[2] eq "gene") {
38 # $tmp[0]=~s/Chr\./Chr/;
39 # $tmp[0]=~s/Chr/chr/;
40 # my @infor=split/;/,$tmp[8];
41 # for (my $i=0;$i<@infor ;$i++) {
42 # if ($infor[$i]=~/Alias\=(\S+)$/) {
43 # $ID=$1;
44 # last;
45 # }
46 # }
47 # $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
48 # print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
49 # }
50 #}
51 close IN;
52 #close OUT;
53
54
55 my $filein=$opts{'i'};
56 my $fileout=$opts{'o'};
57
58 open IN,"<$filein"; #input file
59 open OUT,">$fileout"; #output file
60 while (my $aline=<IN>) {
61 chomp $aline;
62 my @tmp=split/\t/,$aline;
63 if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next}
64 my @result;
65 #shift @tmp;
66 my @id=split/:/,$tmp[0];
67 $id[0]=~s/Chr0/Chr/;
68 my @posi=split/-/,$id[1];
69 foreach my $key (keys %{$gene{$id[0]}}) {
70 if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) {
71 push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}";
72 next;
73 }
74 #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) {
75 if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) {
76 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
77 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
78 next;
79 }
80 #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) {
81 if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) {
82 push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
83 push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
84 next;
85 }
86 }
87 my $result;
88 if (!(@result)) {
89 $result="intergenic";
90 }
91 elsif($#result==0){
92 $result=$result[0];
93
94 }
95 else{
96 $result=join "\t",@result;
97 }
98 # else{
99 # my $te_num=0;
100 # my @te_overlap;
101 # my @te_up_down;
102 # my @non_overlap;
103 # my @non_up_down;
104 # for (my $k=0;$k<@result ;$k++) {
105 # my @rr=split/\;/,$result[$k];
106 # if ($rr[3] eq "Y") {
107 # $te_num++;
108 # if ($rr[0] eq "overlap") {
109 # push @te_overlap,$result[$k];
110 # }
111 # else{
112 # push @te_up_down,$result[$k];
113 # }
114 # }
115 # else{
116 # if ($rr[0] eq "overlap") {
117 # push @non_overlap,$result[$k];
118 # }
119 # else{
120 # push @non_up_down,$result[$k];
121 # }
122 # }
123 # }
124 # if ($te_num==0) {#non TE
125 # if (!(@te_overlap)) {#down up
126 # if ($#non_up_down==0) {
127 # $result=$non_up_down[0];
128 # }
129 # else{#overlap
130 # my $all_2=join "\t",@non_up_down;
131 # $result="up&down1-kb\t".$all_2;
132 # }
133 # }
134 # else{
135 # $result=join "\t",@non_overlap;
136 # if ($#non_overlap>=1) {
137 # print "$aline\t$result\n";
138 # }
139 # }
140 # }
141 # else{#TE
142 # if (!(@te_overlap)) {#down up
143 # if ($#te_up_down==0) {
144 # $result=$te_up_down[0];
145 # }
146 # else{#overlap
147 # my $all_2=join "\t",@te_up_down;
148 # $result="up&down1-kb\t".$all_2;
149 # }
150 # }
151 # else{
152 # $result=join "\t",@te_overlap;
153 # if ($#te_overlap>=1) {
154 # print "$aline\t$result\n";
155 # }
156 # }
157 # }
158 # }
159 print OUT "$aline\t$result\n";
160 }
161
162 close IN;
163 close OUT;
164 sub usage{
165 print <<"USAGE";
166 Version $version
167 Usage:
168 $0 -i -o -g -d
169 options:
170 -i input file
171 -g genelist file
172 -d int the length of the upstream and downstream,default 1000
173 -o output file
174 -h help
175 USAGE
176 exit(1);
177 }
178