Mercurial > repos > bigrna > gpsrna
diff Annotate.pl @ 0:87fe81de0931 draft default tip
Uploaded
author | bigrna |
---|---|
date | Sun, 04 Jan 2015 02:47:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Annotate.pl Sun Jan 04 02:47:25 2015 -0500 @@ -0,0 +1,178 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Chentt +#Email: chentt@big.ac.cn +#Date: 2014/4/10 +#Modified: +#Description: cluster annotate by priority +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h"); +if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} +#my $genelistout=$opts{'t'}; +my $dis=defined $opts{'d'}? $opts{'d'}:1000; +my %gene; + +#open OUT,">$genelistout"; #output file +#print OUT "#ID\tchr\tstart\tend\tstrand\ns"; +open IN,"<$opts{g}"; +while (my $aline=<IN>) { + chomp $aline; + next if($aline=~/^\#/); + my @tmp=split/\t/,$aline;#ID chr start end strand + #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; + $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]]; +} +#while (my $aline=<IN>) { +# chomp $aline; +# next if($aline=~/^\#/); +# my @tmp=split/\t/,$aline; +# my $ID; +# if ($tmp[2] eq "gene") { +# $tmp[0]=~s/Chr\./Chr/; +# $tmp[0]=~s/Chr/chr/; +# my @infor=split/;/,$tmp[8]; +# for (my $i=0;$i<@infor ;$i++) { +# if ($infor[$i]=~/Alias\=(\S+)$/) { +# $ID=$1; +# last; +# } +# } +# $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] +# print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; +# } +#} +close IN; +#close OUT; + + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file +while (my $aline=<IN>) { + chomp $aline; + my @tmp=split/\t/,$aline; + if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next} + my @result; + #shift @tmp; + my @id=split/:/,$tmp[0]; + $id[0]=~s/Chr0/Chr/; + my @posi=split/-/,$id[1]; + foreach my $key (keys %{$gene{$id[0]}}) { + if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) { + push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}"; + next; + } + #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) { + if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) { + push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); + push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); + next; + } + #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) { + if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) { + push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); + push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); + next; + } + } + my $result; + if (!(@result)) { + $result="intergenic"; + } + elsif($#result==0){ + $result=$result[0]; + + } + else{ + $result=join "\t",@result; + } +# else{ +# my $te_num=0; +# my @te_overlap; +# my @te_up_down; +# my @non_overlap; +# my @non_up_down; +# for (my $k=0;$k<@result ;$k++) { +# my @rr=split/\;/,$result[$k]; +# if ($rr[3] eq "Y") { +# $te_num++; +# if ($rr[0] eq "overlap") { +# push @te_overlap,$result[$k]; +# } +# else{ +# push @te_up_down,$result[$k]; +# } +# } +# else{ +# if ($rr[0] eq "overlap") { +# push @non_overlap,$result[$k]; +# } +# else{ +# push @non_up_down,$result[$k]; +# } +# } +# } +# if ($te_num==0) {#non TE +# if (!(@te_overlap)) {#down up +# if ($#non_up_down==0) { +# $result=$non_up_down[0]; +# } +# else{#overlap +# my $all_2=join "\t",@non_up_down; +# $result="up&down1-kb\t".$all_2; +# } +# } +# else{ +# $result=join "\t",@non_overlap; +# if ($#non_overlap>=1) { +# print "$aline\t$result\n"; +# } +# } +# } +# else{#TE +# if (!(@te_overlap)) {#down up +# if ($#te_up_down==0) { +# $result=$te_up_down[0]; +# } +# else{#overlap +# my $all_2=join "\t",@te_up_down; +# $result="up&down1-kb\t".$all_2; +# } +# } +# else{ +# $result=join "\t",@te_overlap; +# if ($#te_overlap>=1) { +# print "$aline\t$result\n"; +# } +# } +# } +# } + print OUT "$aline\t$result\n"; +} + +close IN; +close OUT; +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -g -d +options: +-i input file +-g genelist file +-d int the length of the upstream and downstream,default 1000 +-o output file +-h help +USAGE +exit(1); +} +