annotate [APliBio]Nebula tools suite/Nebula/AnnotatePeaks/annotatePeaks.pl @ 4:0b8b39c2ce01 draft default tip

Uploaded
author alermine
date Wed, 14 Nov 2012 06:04:04 -0500
parents 2ec3ba0e9e70
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1 #:t:::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
2 #:t::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
3 #:::::::::::::z;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
4 #::::::::::::i@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
5 #::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@$@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
6 #:::::::::::3@@@@@@@@@@@@@@@@@@@@@@@@@B@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
7 #::::::::::3@@@@@@@@@@@@@@@@@@@@@BEEESSE5EEEEBBM@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
8 #::::::::::3@@@@@@@@@@@@@@@@@@@@BEEEEEE35EE55E2355E5SBMB@@@@@@@@@@@@@@@@@$
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
9 #::::::::::@@@@@@@@@@@@@@@@@@@EEEE55533t3tttt::::::!!!!7755E755SBBMMM@@@MM
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
10 #::::::::::3@@@@@@@@@@@@@@@@@@EEEE2t3ttttt:::::::::::::::::::::::!7?5225EE
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
11 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEE31t::::::::::::::::::::::::::::::::3E5@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
12 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEEtt:::::::::::::::::::::::::::::::::353
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
13 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEE1ttz::::::::::::::::::::::::::::::::35
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
14 #:::::::::::@@@@@@@@@@@@@@@@@@EEEEEEEtz1::::::::::::::::::::::::::::::::t:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
15 #:::::::::!3@@@@@@@@@@@@@@@@@@@EEEEEttt::::::::::::::::::::::::::::::::;zz
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
16 #::::::::::@@@@@@@@@@@@@@@@@@@@EEEEEttt:::::z;z:::::::::::::::::::::::::13
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
17 #::::::::::3B@@@@@@@@@@@@@@@@@@EEEEEEE3tt:czzztti;:::::::::::::::::::::::3
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
18 #::::ttt::::3@@@@@@@@@@@@@@@@EEEEE5EE25Ezt1EEEz5Etzzz;;;;:::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
19 #:::::::::::I9@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEE@@@@@@@@@@@@@@Ez;:::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
20 #:::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ez::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
21 #::::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@BE5EBB@@@@@@@@@@@@@@@EEE:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
22 #:::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@E1::35@@@@@@@@@@ME3MMME2::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
23 #:::::::::::::::?@@@@@@@@@@@@@@@@@@M@@@@@@@EE:::::3SB@@BBESEEt::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
24 #::::::::::::::::J$@@@@@@@B@@@@@@@@@@@@@@@@EE:::::::!35E33t:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
25 #:::::::::::::::::3@E@@@EE5EESE5EESE@@@@@@@Et::::::::::::tz:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
26 #:::::::::::::::::J@E$@EEE5133555SE@@@@@@@@Et:::::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
27 #::::::::::::::::::E@E@EEEEtt3523EEE@@@@@@@E::::::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
28 #:t::::::::::::::::JEE3@@@EEEEEEEEEE@@@@@@@E:::::::::t;:::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
29 #:t:::::::::::::::::!5ES@EEEEEEEEES@@@@@@@@@E;:::;;;:3Ez::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
30 #:t::::::::::::::::::::JE@@EEEEEEE@@@@@@@@@@@@@@@@ME!:::;:::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
31 #:tz::::::::::::::::::::JE@@@EEEE@@@@@@@@@@@@@@EE!:::::::t::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
32 #:t::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@ESBE::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
33 #:::::::::::::::::::::::::Q@@@@@@@@@@@@@@@@EE3EE;:::::zzzz::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
34 #:::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@@@@NN@@@@@@Ez:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
35 #:zt:::::::::::::::::::::::3@@@@EE@@@@@@@@@@EEEEt::;z113E5t:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
36 #::tt:::::::::::::::::::::::3@@@E@@@@@@@@@@@@@@@@BEt::::::::::::::::t:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
37 #:tt:t:::::::::::::::::::::::?S@@@@@@@@@@@BBEEE51!::::::::::::::zzzEt:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
38 #::::::::::::::::::::::::::::::3Q@@@@@@@BEEEEEt:::::::::::::;zz@@@EE::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
39 #::::::::::::::::::::::::::::::::75B@@@@@EEEtt;:::::::::;zz@@@@BEEEtz:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
40 #::::::::::::::::::::::::::::::::::::?9@@@@@@@@@@@E2Ezg@@@@@B@@@EEEE1t::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
41 #:::::::::::::::::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@E@EEEEEEEzzz::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
42 #::::::::::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@EEEEEEE5ttttt
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
43 #:::::::::::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEEEEEEEtzt
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
44 #::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@E@@EEEEEEEEEEEE@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
45 #::::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE3EEEE@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
46 #:::::::::::::::::::::;;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEt33@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
47 #:::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E@@@@@@EEEtg@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
48 #::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
49 #:::::::::::::@@@@@@@@@@@@@@@@@$@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
50 #::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
51 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
52 # Copyleft ↄ⃝ 2012 Institut Curie
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
53 # Author(s): Valentina Boeva, Alban Lermine (Institut Curie) 2012
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
54 # Contact: valentina.boeva@curie.fr, alban.lermine@curie.fr
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
55 # This software is distributed under the terms of the GNU General
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
56 # Public License, either Version 2, June 1991 or Version 3, June 2007.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
57
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
58 #!/usr/bin/perl -w
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
59
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
60 #for a complete list of genes and a list of sites, sorts genes close to sites
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
61 #printDistance
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
62
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
63 #read only gene files with refSeqGenes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
64 #counts only once overlapping transcripts or genes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
65
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
66 #calculates some stats about locations of peaks
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
67
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
68 #uses hierarchie : promoter, imUpstream, intragenic,enh, 5kbdownstream, intergenic + for genes: fExon,exon,fIntron,intron,junction+-50kb ONLY FOR "findClosestGene"
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
69 #outputs fold change of expression is available
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
70
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
71 #outputname - corrected
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
72 #all isoforms are considered, even those that start and end at the same coordinates
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
73 #uses fluorescence values
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
74
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
75 #only one location per gene in mode "all" (but promoter+intragenic if both)
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
76
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
77 #read directly Noresp/down/up genes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
78
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
79 #prints distance to TE
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
80
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
81 use strict;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
82 use POSIX;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
83
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
84 my $SitesFilename ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
85 my $GenesFilename ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
86 my $SelectedGenesFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
87 my $MirFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
88 my $minScore = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
89 my $BIGNUMBER = 10000000000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
90 my $enhLeft = -30000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
91 my $enhRight = -1500;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
92 my $promLeft = $enhRight ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
93 my $immediateDownstream = 2000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
94 my $maxScore = $BIGNUMBER ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
95 my $verbose = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
96 my $maxDistance = $BIGNUMBER ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
97 my $downStreamDist = 5000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
98 my $jonctionSize = 50;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
99 my $all = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
100 my $fluoFile = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
101 my $regType = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
102 my $GCislands = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
103 my $ResFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
104
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
105 my $usage = qq{
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
106 $0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
107
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
108 -----------------------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
109 mandatory parameters:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
110
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
111 -g filename file with all genes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
112 -tf filename file with sites of TF
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
113
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
114 -----------------------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
115 optional parameters:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
116 -v for verbose
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
117 -mir filename file with positions of miRNA
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
118 -maxDist value maximal Distance to genes (def. Inf)
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
119 -minScore value minimal Score (def. 0)
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
120 -maxScore value maximal Score (def. Inf)
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
121 -selG filename file with selected genes and their expression levels
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
122 -all to output all genes intersecting with the peak
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
123 -fluo filename file with fluorescence
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
124 -regType valeu down-regulated, up-regulated, no-response
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
125 -gc filename file with gc-islands
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
126
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
127 -lp valeu upstream of TSS region to define promoter
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
128 -rp valeu downstream of TSS region to define immediate downstream
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
129 -enh valeu upstream of TSS region to define enhancer
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
130 -dg valeu downstream of transcription end region to define gene downstream
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
131
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
132 -o filename output filename
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
133
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
134 };
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
135
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
136 if(scalar(@ARGV) == 0){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
137 print $usage;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
138 exit(0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
139 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
140
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
141
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
142 while(scalar(@ARGV) > 0){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
143 my $this_arg = shift @ARGV;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
144 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
145
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
146 elsif ( $this_arg eq '-g') {$GenesFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
147 elsif ( $this_arg eq '-tf') {$SitesFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
148 elsif ( $this_arg eq '-mir') {$MirFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
149 elsif ( $this_arg eq '-selG') {$SelectedGenesFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
150 elsif ( $this_arg eq '-maxDist') {$maxDistance = 1;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
151 elsif ( $this_arg eq '-maxScore') {$maxScore = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
152 elsif ( $this_arg eq '-minScore') {$minScore = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
153 elsif ( $this_arg eq '-v') {$verbose = 1;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
154 elsif ( $this_arg eq '-all') {$all = 1;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
155 elsif ( $this_arg eq '-regType') {$regType = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
156
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
157 elsif ( $this_arg eq '-fluo') {$fluoFile = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
158 elsif ( $this_arg eq '-gc') {$GCislands = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
159 elsif ( $this_arg eq '-o') {$ResFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
160
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
161 elsif ( $this_arg eq '-lp') {$enhRight = shift @ARGV;$promLeft = $enhRight;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
162 elsif ( $this_arg eq '-rp') {$immediateDownstream = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
163 elsif ( $this_arg eq '-enh') {$enhLeft = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
164 elsif ( $this_arg eq '-dg') {$downStreamDist = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
165
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
166 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
167 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
168
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
169 if ($maxDistance<0) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
170 die "\tThe maximal distance must be positive!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
171 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
172 my $tmpn = $SitesFilename;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
173 $tmpn =~ s/.*[\/\\]//;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
174 if ($ResFilename eq "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
175 $ResFilename = $SelectedGenesFilename.$tmpn."_".$minScore."_dists.txt";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
176
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
177 if ($maxScore != $BIGNUMBER) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
178 $ResFilename = $SelectedGenesFilename.$SitesFilename."_".$minScore."_$maxScore"."_dists.txt";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
179 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
180
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
181 if ($all == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
182 $ResFilename =~ s/_dists/_all_dists/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
183 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
184 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
185
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
186
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
187 #-----------read selected genes----------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
188 my %selectedGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
189 my %selectedGenesFoldChange;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
190 if ( $SelectedGenesFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
191 open (FILE, "<$SelectedGenesFilename ") or die "Cannot open file $SelectedGenesFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
192 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
193 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
194 my @a = split/\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
195 $selectedGenes{$a[1]} = $a[3];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
196 $selectedGenesFoldChange{$a[1]} = $a[2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
197 #print "gene:$a[1],reg:$selectedGenes{$a[1]},FC:$selectedGenesFoldChange{$a[1]}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
198 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
199
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
200 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
201 print "\t\t$SelectedGenesFilename is read!\n" if ($verbose);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
202 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
203
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
204 #-----------read genes with fluorescence---------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
205 my %fluoGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
206 if ( $fluoFile ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
207 open (FILE, "<$fluoFile ") or die "Cannot open file $fluoFile !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
208 my $gene = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
209 my $med = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
210 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
211 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
212 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
213 my @a = split/\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
214
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
215 next if (scalar(@a)<5);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
216 next if ($a[0] eq "probesets");
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
217 next unless ($a[0] =~m/\S/);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
218 next unless ($a[4] =~m/\S/);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
219 if ($gene ne "" && $gene ne $a[2]) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
220 #calcMed
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
221 $med = med(keys %h);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
222 $fluoGenes{$gene} = $med;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
223 $med=0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
224 delete @h{keys %h};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
225 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
226 #$h{$a[4]} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
227 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
228 $gene = $a[2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
229 $h{$a[4]} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
230 #print "keys ", scalar(keys %h),"\t",keys %h,"\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
231 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
232 if ($gene ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
233 $med = med(keys %h);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
234 $fluoGenes{$gene} = $med;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
235 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
236 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
237 print "\t\t$fluoFile is read!\n" if ($verbose);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
238 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
239 #-----------read GC-islands----------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
240 my %GCislands;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
241 if ($GCislands ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
242 open (FILE, "<$GCislands ") or die "Cannot open file $GCislands !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
243
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
244 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
245 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
246 my @a = split/\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
247 #bin chrom chromStart chromEnd name length cpgNum gcNum perCpg perGc obsExp
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
248 #107 chr1 36568608 36569851 CpG: 128 1243 128 766 20.6 61.6 1.09
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
249 my $chr = $a[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
250 my $start = $a[2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
251 my $end = $a[3];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
252 $GCislands{$chr}->{$start}=$end;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
253 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
254 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
255 } elsif ($verbose) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
256 print "you did not specify a file with GC-islands\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
257 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
258
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
259 #-----------read genes----------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
260
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
261 my %genes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
262
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
263 my $count = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
264
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
265 open (GENES, "<$GenesFilename") or die "Cannot open file $GenesFilename!!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
266 <GENES>;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
267 while (<GENES>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
268 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
269 if (/(chr.*)\s([+-])\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\S+)\s(\S+)\s\S+\s(\S+)/){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
270 my $name = $10;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
271 my $chr = $1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
272 my $strand = $2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
273 if ($strand eq '+') {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
274 $strand = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
275 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
276 else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
277 $strand = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
278 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
279 my $leftPos = $3;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
280 my $rightPos = $4;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
281 my $cdsStart= $5;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
282 my $cdsEnd= $6;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
283 my $exonCount= $7;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
284 my $exonStarts= $8;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
285 my $exonEnds= $9;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
286 my $ID = "$name\t$chr:$leftPos-$rightPos;$exonStarts-$exonEnds";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
287 my $foldChange = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
288 my $reg = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
289 my $fluo = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
290 if ( $SelectedGenesFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
291 #print "$name\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
292 if (exists($selectedGenes{$name})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
293 $reg = $selectedGenes{$name};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
294 $foldChange = $selectedGenesFoldChange{$name};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
295 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
296 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
297 unless (exists($genes{$chr})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
298 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
299 $genes{$chr} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
300 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
301 unless (exists($genes{$chr}->{$ID})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
302 my %h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
303 $genes{$chr}->{$ID} = \%h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
304 $count++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
305 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
306 if ( $fluoFile ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
307 if (exists($fluoGenes{$name})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
308 $fluo = $fluoGenes{$name};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
309 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
310 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
311 $genes{$chr}->{$ID}{'name'} = $name ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
312 $genes{$chr}->{$ID}{'left'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
313 $genes{$chr}->{$ID}{'right'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
314 $genes{$chr}->{$ID}{'cdsStart'} = $cdsStart;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
315 $genes{$chr}->{$ID}{'cdsEnd'} = $cdsEnd;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
316 $genes{$chr}->{$ID}{'strand'} = $strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
317 $genes{$chr}->{$ID}{'exonCount'} = $exonCount;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
318 $genes{$chr}->{$ID}{'exonStarts'} = $exonStarts;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
319 $genes{$chr}->{$ID}{'exonEnds'} = $exonEnds;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
320 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
321 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
322 $genes{$chr}->{$ID}{'reg'} = $reg;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
323 $genes{$chr}->{$ID}{'foldChange'} = $foldChange;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
324 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
325 ($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = getFirstIntron ($exonCount,$exonStarts,$exonEnds,$strand);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
326 ($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) = getFirstExon ($exonCount,$exonStarts,$exonEnds,$strand);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
327
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
328 $genes{$chr}->{$ID}{'fluo'} = $fluo;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
329
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
330 $genes{$chr}->{$ID}{'GCisland'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
331 if ($GCislands ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
332 $genes{$chr}->{$ID}{'GCisland'} = checkIfGC ($genes{$chr}->{$ID}{'TSS'},$strand,2000,$GCislands{$chr});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
333 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
334
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
335 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
336 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
337 print "Total genes : $count\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
338 close GENES;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
339 print "\t\t$GenesFilename is read!\n" if ($verbose);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
340 #for my $gName (sort keys %{$genes{'chr18'}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
341
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
342 # print "$gName\t$genes{'chr18'}->{$gName}{'TSS'}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
343 #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
344
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
345 #-----------read file with sites miRNA, store as genes-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
346
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
347 if ( $MirFilename eq ""){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
348 print "you did not specify file with miRNA\n" if ($verbose);;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
349 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
350 else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
351
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
352
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
353 open (MIR, "<$MirFilename ") or die "Cannot open file $MirFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
354 #chr1 20669090 20669163 mmu-mir-206 960 +
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
355 while (<MIR>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
356 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
357 my ($name, $chr, $leftPos, $rightPos, $strand );
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
358 my $fluo = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
359
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
360 #1 . miRNA 20669091 20669163 . + . ACC="MI0000249"; ID="mmu-mir-206";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
361 if (/([0-9XYM]+)\s.\smiRNA\s(\d+)\s(\d+)\s.\s([+-])\s.\sACC=.*ID=\"(.*)\"/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
362 $name = $5;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
363 $chr = $1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
364 $leftPos = $2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
365 $rightPos = $3;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
366 $strand = $4;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
367 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
368 elsif (/(.*)\s(\d+)\s(\d+)\s(.*)\s(.*)\s(.*)/){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
369 $name = $4;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
370 $chr = $1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
371 $leftPos = $2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
372 $rightPos = $3;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
373 $strand = $6;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
374 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
375 next;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
376 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
377
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
378 unless ($chr =~ m/chr/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
379 $chr = "chr".$chr;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
380 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
381 my $ID = "$name\t$chr:$leftPos-$rightPos";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
382
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
383 if ($strand eq '+') {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
384 $strand = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
385 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
386 else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
387 $strand = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
388 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
389
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
390 unless (exists($genes{$chr})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
391 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
392 $genes{$chr} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
393 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
394 unless (exists($genes{$chr}->{$ID})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
395 my %h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
396 $genes{$chr}->{$ID} = \%h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
397 $count++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
398 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
399 $genes{$chr}->{$ID}{'name'} = $name ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
400 $genes{$chr}->{$ID}{'left'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
401 $genes{$chr}->{$ID}{'right'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
402 $genes{$chr}->{$ID}{'cdsStart'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
403 $genes{$chr}->{$ID}{'cdsEnd'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
404 $genes{$chr}->{$ID}{'strand'} = $strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
405 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
406 $genes{$chr}->{$ID}{'exonCount'} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
407 $genes{$chr}->{$ID}{'exonStarts'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
408 $genes{$chr}->{$ID}{'exonEnds'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
409 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
410 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
411 $genes{$chr}->{$ID}{'reg'} = "miRNA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
412 $genes{$chr}->{$ID}{'foldChange'} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
413 ($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
414 ($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) = (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
415
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
416 $genes{$chr}->{$ID}{'fluo'} = $fluo;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
417 $genes{$chr}->{$ID}{'GCisland'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
418
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
419
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
420
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
421 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
422
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
423
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
424 close MIR;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
425 print "\t\t$MirFilename is read!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
426 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
427 #-----------read file with sites and find N closest genes-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
428 my $numberOfAllSites = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
429
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
430 open (FILE, "<$SitesFilename") or die "Cannot open file $SitesFilename!!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
431 open (OUT , ">$ResFilename") or die "Cannot open file $ResFilename!!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
432 print OUT "Chromosome\tStart\tEnd\tMax\tScore\tDistTSS\tType\tTypeIntra\tReg\tFoldChange\tDistTE\tGeneName\tGeneCoordinates\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
433 my $header = <FILE>; #read header
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
434 my @a = split (/\t/,$header );
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
435 my $correction = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
436 if ($a[1] =~m/chr/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
437 $correction=1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
438 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
439
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
440 unless ($header=~m/Chromosome/ || $header=~m/track/|| $header=~m/Start/i) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
441 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
442 open (FILE, "<$SitesFilename") or die "Cannot open file $SitesFilename!!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
443 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
444
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
445 $count = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
446
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
447 my %typehash;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
448 my %typehashIntra;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
449
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
450 $typehashIntra{"f_exon"} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
451 $typehashIntra{"f_intron"} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
452 $typehashIntra{"jonction"} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
453 $typehashIntra{"exon"} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
454 $typehashIntra{"intron"} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
455
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
456
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
457 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
458 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
459 $numberOfAllSites++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
460 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
461 my $chro = $a[0+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
462 my $fpos = $a[1+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
463 my $lpos = $a[2+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
464 my $maxpos = $a[3+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
465 if ($maxpos =~ m/\D/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
466 $maxpos = int(($lpos+$fpos)/2);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
467 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
468 my $score = $a[4+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
469 next if ($score < $minScore);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
470 next if ($score >= $maxScore);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
471 $score = $score +1 -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
472 #print "$score" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
473
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
474 #my $site = "$chro:$fpos-$lpos\t$maxpos\t$score";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
475 #print $site ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
476 #my $motifNumber = $a[3];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
477
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
478 #my $geneSet = &search_genes($N,$chro,($fpos+$lpos)/2,\%genes);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
479 my @b;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
480 unless ($all) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
481 @b = &findClosestGene($fpos,$lpos,$score,$chro,$maxpos,\%genes); #findAllClosestGene for all genes overlaping a peak
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
482 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
483 @b = &findAllClosestGeneOneLocation ($fpos,$lpos,$score,$chro,$maxpos,\%genes); #findAllClosestGene
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
484 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
485 #print "$distTSS\t$distTE\n" unless ($distTSS == $BIGNUMBER);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
486 for my $type (@b) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
487 $typehash{$type} = 0 unless (exists($typehash{$type}));
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
488 $typehash{$type}++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
489 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
490 $count ++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
491
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
492 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
493 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
494 close OUT ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
495
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
496 print "Total Sites: $count\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
497 my $nEntries = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
498 for my $type (sort keys %typehash) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
499 $nEntries += $typehash{$type};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
500 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
501 print "Type\tCount\tFrequency\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
502 for my $type (sort keys %typehash) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
503 print $type,"\t",$typehash{$type},"\t",$typehash{$type}/$nEntries,"\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
504 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
505 for my $type (sort keys %typehashIntra) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
506 print $type,"\t",$typehashIntra{$type},"\t",$typehashIntra{$type}/$nEntries,"\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
507 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
508
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
509
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
510 #my @arr = @{$genes{'chr'}};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
511
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
512 sub findClosestGene {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
513 my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
514 my ($distTSS,$distTE,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,$BIGNUMBER,"intergenic",0,0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
515 my $locDistTSS = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
516 my $locDistTE = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
517 my %hashForOverlapingGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
518 my @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
519 my $typeIntra = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
520 for my $gName (keys %{$genes->{$chro}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
521 my $TSS = $genes->{$chro}{$gName}{'TSS'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
522 my $strand = $genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
523 my $TE = $genes->{$chro}{$gName}{'TE'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
524
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
525 $locDistTE = ($pos-$TE)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
526 $locDistTSS = ($pos-$TSS)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
527 #print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
528 #my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
529
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
530 #print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
531 #print "$locDistTSS $enhLeft $enhRight\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
532
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
533 if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
534 $type = "enhancer";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
535 } elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
536 $type = "promoter";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
537 } elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)&&($locDistTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
538 $type = "immediateDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
539 } elsif (($locDistTSS >= $immediateDownstream)&&($locDistTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
540 $type = "intragenic";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
541 } elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
542 $type = "5kbDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
543 } elsif (abs($locDistTSS)<abs($distTSS)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
544 $distTSS = $locDistTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
545 $distTE = $locDistTE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
546 ($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
547 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
548
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
549 if (($type ne "intergenic")&&($type ne "NA")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
550 unless (exists ($hashForOverlapingGenes{$type})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
551 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
552 $hashForOverlapingGenes{$type} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
553 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
554 $hashForOverlapingGenes{$type}->{$locDistTSS}=$gName;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
555
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
556 #print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$genes->{$chro}{$gName}{'reg'}\t$gName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
557 $type = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
558 #unless ($gName =~ m/$TSS/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
559 # print "$gName\t$TSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
560 #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
561 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
562
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
563 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
564 if ($type eq "intergenic") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
565 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$typeIntra\t$reg\t$foldChange\t$distTE\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
566 #print "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
567 push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
568
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
569 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
570 my $selectedType;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
571 if (exists ($hashForOverlapingGenes{"promoter"})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
572 $selectedType = "promoter";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
573 } elsif (exists ($hashForOverlapingGenes{"immediateDownstream"})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
574 $selectedType = "immediateDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
575 } elsif (exists ($hashForOverlapingGenes{"intragenic"})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
576 $selectedType = "intragenic";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
577 } elsif (exists ($hashForOverlapingGenes{"enhancer"})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
578 $selectedType = "enhancer";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
579 } elsif (exists ($hashForOverlapingGenes{"5kbDownstream"})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
580 $selectedType = "5kbDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
581 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
582 my $bestGene = printBest($hashForOverlapingGenes{$selectedType},$chro,$fpos,$lpos,$pos,$score,$genes,$selectedType); #print "$type\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
583 push(@array2return,$selectedType);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
584 my %otherGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
585 for my $type ("promoter","immediateDownstream","intragenic","enhancer","5kbDownstream") { #"firstIntron","exon","intron",
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
586 for my $locDistTSS (sort {$a<=>$b} keys %{$hashForOverlapingGenes{$type}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
587 my $otherGene = $hashForOverlapingGenes{$type}->{$locDistTSS};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
588 next if ($genes->{$chro}{$bestGene}{'name'} eq $genes->{$chro}{$otherGene}{'name'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
589 next if (exists ($otherGenes{$genes->{$chro}{$otherGene}{'name'}}));
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
590 if (overlapGenes($otherGene,$bestGene,$chro,$genes)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
591
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
592 if (($type eq "intragenic")||($type eq "immediateDownstream")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
593 $typeIntra = &getTypeIntra($genes->{$chro}{$otherGene},$pos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
594 $typehashIntra{$typeIntra}++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
595 }else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
596 $typeIntra = "NA"; }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
597 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$otherGene}{'reg'}\t$genes->{$chro}{$otherGene}{'foldChange'}\t",($pos-$genes->{$chro}{$otherGene}{'TE'})*$genes->{$chro}{$otherGene}{'strand'},"\t$otherGene\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
598 push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
599
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
600 #print "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$genes->{$chro}{$otherGene}{'reg'}\t$otherGene\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
601 #print $type,"\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
602 $otherGenes{$genes->{$chro}{$otherGene}{'name'}} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
603 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
604 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
605 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
606
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
607 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
608 return @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
609 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
610
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
611 sub findAllClosestGeneOneLocation {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
612 my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
613 my ($distTSS,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,"intergenic",0,0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
614 my $locDistTSS = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
615 my $locDistTE = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
616 my %hashForOverlapingGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
617 my @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
618 my $typeIntra = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
619 for my $gName (keys %{$genes->{$chro}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
620 my $TSS = $genes->{$chro}{$gName}{'TSS'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
621 my $strand = $genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
622 my $TE = $genes->{$chro}{$gName}{'TE'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
623
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
624 $locDistTE = ($pos-$TE)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
625 $locDistTSS = ($pos-$TSS)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
626 #print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
627 #my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
628
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
629 #print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
630 #print "$locDistTSS $enhLeft $enhRight\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
631
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
632 if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
633 $type = "enhancer";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
634 } elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
635 $type = "promoter";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
636 } elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)&&($locDistTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
637 $type = "immediateDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
638 } elsif (($locDistTSS >= $immediateDownstream)&&($locDistTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
639 $type = "intragenic";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
640 } elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
641 $type = "5kbDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
642 } elsif (abs($locDistTSS)<abs($distTSS)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
643 $distTSS = $locDistTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
644 ($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
645 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
646
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
647 if (($type ne "intergenic")&&($type ne "NA")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
648 unless (exists ($hashForOverlapingGenes{$type})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
649 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
650 $hashForOverlapingGenes{$type} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
651 $hashForOverlapingGenes{$type}->{$gName}=$locDistTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
652
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
653 if (($type eq "intragenic")||($type eq "immediateDownstream")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
654 $typeIntra = &getTypeIntra($genes->{$chro}{$gName},$pos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
655 $typehashIntra{$typeIntra}++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
656 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
657 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'fluo'}\t$genes->{$chro}{$gName}{'foldChange'}\t$genes{$chro}->{$gName}{'GCisland'}\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t$gName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
658 push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
659 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
660 $typeIntra = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
661 $type = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
662 #unless ($gName =~ m/$TSS/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
663 # print "$gName\t$TSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
664 #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
665 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
666
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
667 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
668 if ($type eq "intergenic") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
669 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$geneName}{'reg'}\t$genes->{$chro}{$geneName}{'fluo'}\t$genes->{$chro}{$geneName}{'foldChange'}\t$genes{$chro}->{$geneName}{'GCisland'}\t",($pos-$genes->{$chro}{$geneName}{'TE'})*$genes->{$chro}{$geneName}{'strand'},"\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
670
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
671 #print "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
672 push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
673
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
674 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
675 return @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
676 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
677
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
678 sub findAllClosestGene {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
679 my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
680 my ($distTSS,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,"intergenic",0,0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
681 my $locDistTSS = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
682 my $locDistTE = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
683 my %hashForOverlapingGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
684 my @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
685 my $typeIntra = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
686 for my $gName (keys %{$genes->{$chro}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
687 my $TSS = $genes->{$chro}{$gName}{'TSS'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
688 my $strand = $genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
689 my $TE = $genes->{$chro}{$gName}{'TE'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
690
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
691 $locDistTE = ($pos-$TE)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
692 $locDistTSS = ($pos-$TSS)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
693 #print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
694 #my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
695
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
696 #print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
697 #print "$locDistTSS $enhLeft $enhRight\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
698
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
699 if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
700 $type = "enhancer";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
701 } elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
702 $type = "promoter";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
703 } elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)&&($locDistTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
704 $type = "immediateDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
705 } elsif (($locDistTSS >= $immediateDownstream)&&($locDistTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
706 $type = "intragenic";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
707 } elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
708 $type = "5kbDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
709 } elsif (abs($locDistTSS)<abs($distTSS)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
710 $distTSS = $locDistTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
711 ($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
712 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
713
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
714 if (($type ne "intergenic")&&($type ne "NA")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
715 unless (exists ($hashForOverlapingGenes{$type})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
716 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
717 $hashForOverlapingGenes{$type} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
718 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
719 #$hashForOverlapingGenes{$type}->{$gName}=$locDistTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
720
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
721 $typeIntra = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
722 if (($type eq "intragenic")||($type eq "immediateDownstream")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
723 $typeIntra = &getTypeIntra($genes->{$chro}{$gName},$pos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
724 $typehashIntra{$typeIntra}++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
725 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
726 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'fluo'}\t$genes->{$chro}{$gName}{'foldChange'}\t$genes{$chro}->{$gName}{'GCisland'}\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t$gName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
727 push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
728
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
729 $type = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
730 #unless ($gName =~ m/$TSS/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
731 # print "$gName\t$TSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
732 #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
733 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
734
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
735 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
736 if ($type eq "intergenic") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
737 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$geneName}{'reg'}\t$genes->{$chro}{$geneName}{'fluo'}\t$genes->{$chro}{$geneName}{'foldChange'}\t$genes{$chro}->{$geneName}{'GCisland'}\t",($pos-$genes->{$chro}{$geneName}{'TE'})*$genes->{$chro}{$geneName}{'strand'},"\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
738
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
739 #print "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
740 push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
741
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
742 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
743 return @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
744 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
745
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
746 sub getTypeIntra {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
747
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
748 my ($geneEntry, $pos) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
749 my $type;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
750
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
751 if (($pos >= $geneEntry->{'firstIntronStart'})&&($pos <=$geneEntry->{'firstIntronEnd'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
752 return "f_intron";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
753 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
754 if (($pos >= $geneEntry->{'firstExonStart'})&&($pos <=$geneEntry->{'firstExonEnd'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
755 return "f_exon";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
756 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
757 $type = getIntronExon ($pos, $geneEntry->{'exonCount'},$geneEntry->{'exonStarts'},$geneEntry->{'exonEnds'},$geneEntry->{'strand'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
758 return $type;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
759 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
760
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
761 sub overlapGenes {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
762 my ($otherGene,$bestGene,$chro,$genes)= @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
763 my $a1 = $genes->{$chro}{$otherGene}{'left'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
764 my $a2 = $genes->{$chro}{$bestGene}{'left'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
765 my $e1 = $genes->{$chro}{$otherGene}{'right'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
766 my $e2 = $genes->{$chro}{$bestGene}{'right'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
767 if (($a1 >= $a2)&&($a1 <= $e2)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
768 return 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
769 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
770 if (($a2 >= $a1)&&($a2 <= $e1)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
771 return 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
772 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
773 return 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
774 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
775
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
776 sub printBest {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
777 my ($hash,$chro,$fpos,$lpos,$pos,$score,$genes,$type) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
778 for my $locDistTSS (sort {$a<=>$b} keys %{$hash}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
779 my $gName = $hash->{$locDistTSS};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
780 my $typeIntra = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
781 if (($type eq "intragenic")||($type eq "immediateDownstream")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
782 $typeIntra = &getTypeIntra($genes->{$chro}{$gName},$pos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
783 $typehashIntra{$typeIntra}++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
784 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
785 print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'foldChange'}\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t$gName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
786 return $gName;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
787 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
788 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
789
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
790 #sub findAllClosestGene {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
791 # my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
792 # my ($distTSS,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,"intergenic",0,0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
793 # my $locDistTSS = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
794 # my $locDistTE = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
795 # my @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
796 # for my $gName (keys %{$genes->{$chro}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
797 # my $TSS = $genes->{$chro}{$gName}{'TSS'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
798 # my $strand = $genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
799 # my $TE = $genes->{$chro}{$gName}{'TE'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
800 # $locDistTSS = ($pos-$TSS)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
801 # $locDistTE = ($pos-$TE)*$strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
802 # #print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
803 # #my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
804 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
805 # #print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
806 # #print "$locDistTSS $enhLeft $enhRight\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
807 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
808 # if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
809 # $type = "enhancer";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
810 # } elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
811 # $type = "promoter";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
812 # } elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
813 # $type = "immediateDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
814 # } elsif (($pos >= $genes{$chro}->{$gName}{'firstIntronStart'})&&($pos <= $genes{$chro}->{$gName}{'firstIntronEnd'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
815 # $type = "firstIntron";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
816 # } elsif (($locDistTSS> $immediateDownstream)&&($locDistTSS<=$genes{$chro}->{$gName}{'length'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
817 # #$type = "intragenic";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
818 # $type = &getIntronExon ($pos, $genes{$chro}->{$gName}{'exonCount'},$genes{$chro}->{$gName}{'exonStarts'},$genes{$chro}->{$gName}{'exonEnds'},$genes{$chro}->{$gName}{'strand'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
819 # } elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
820 # $type = "5kbDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
821 # } elsif (abs($locDistTSS)<abs($distTSS)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
822 # $distTSS = $locDistTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
823 # ($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
824 # }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
825 # if (($type ne "intergenic")&&($type ne "NA")) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
826 # print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'foldChange'}\t$gName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
827 # push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
828 # $type = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
829 # #unless ($gName =~ m/$TSS/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
830 # # print "$gName\t$TSS\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
831 # #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
832 # }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
833 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
834 # }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
835 # if ($type eq "intergenic") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
836 # print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$foldChange\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
837 # push(@array2return,$type);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
838 # #print "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
839 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
840 # }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
841 # return @array2return;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
842 #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
843
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
844 sub getFirstIntron {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
845 my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
846 my ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
847 if ($exonCount == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
848 return (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
849 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
850 if ($strand == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
851 $left = (split ",", $exonEnds)[0]+$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
852 $right = (split (",", $exonStarts))[1]-$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
853 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
854 $left = (split (",", $exonEnds))[$exonCount-2]+$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
855 $right = (split (",", $exonStarts))[$exonCount-1]-$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
856 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
857 ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
858 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
859
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
860 sub getFirstExon {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
861 my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
862 my ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
863 if ($exonCount == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
864 return (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
865 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
866 if ($strand == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
867 $left = (split ",", $exonStarts)[0];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
868 $right = (split (",", $exonEnds))[0]-$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
869 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
870 $left = (split (",", $exonStarts))[$exonCount-1]+$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
871 $right = (split (",", $exonEnds))[$exonCount-1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
872 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
873 ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
874 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
875
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
876 sub getIntronExon {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
877 my ($pos,$exonCount,$exonStarts,$exonEnds,$strand) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
878 my (@left,@right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
879 @left = (split ",", $exonStarts);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
880 @right = (split (",", $exonEnds));
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
881
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
882 for (my $i = 0; $i<$exonCount;$i++) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
883 #print "$left[$i] <= $pos ? $pos <= $right[$i]\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
884 if (($left[$i]+$jonctionSize < $pos) && ($pos < $right[$i]-$jonctionSize)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
885 #print "URA!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
886 return "exon";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
887 } elsif (($i+1<$exonCount)&&($right[$i]+$jonctionSize < $pos) && ($pos < $left[$i+1]-$jonctionSize)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
888 return "intron";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
889 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
890 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
891 return "jonction";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
892 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
893
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
894 sub min {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
895 return $_[0] if ($_[0]<$_[1]);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
896 $_[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
897 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
898 sub med {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
899 my @arr = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
900 my $med = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
901 @arr = sort {$a <=> $b} @arr;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
902 if ((scalar(@arr)/2) =~ m/[\.\,]5/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
903 return $arr[floor(scalar(@arr)/2)];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
904 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
905 return ($arr[scalar(@arr)/2]+$arr[scalar(@arr)/2-1])/2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
906 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
907 $med;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
908 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
909
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
910 sub checkIfGC {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
911 my ($TSS,$strand,$dist,$GCislandsChr)=@_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
912 my $ifGC = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
913 my $leftProm=$TSS-$dist;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
914 my $rightProm = $TSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
915 if ($strand== -1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
916 my $leftProm=$TSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
917 my $rightProm = $TSS+$dist;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
918 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
919 for my $leftGC (keys %{$GCislandsChr}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
920 my $rightGC = $GCislandsChr->{$leftGC};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
921 if ($leftGC>=$leftProm&&$leftGC<=$rightProm || $rightGC>=$leftProm&&$rightGC<=$rightProm) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
922 return "GC-island";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
923 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
924 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
925 return $ifGC ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
926 }