annotate [APliBio]Nebula tools suite/Nebula/AnnotateGenes/geneAnnotation.pl @ 4:0b8b39c2ce01 draft default tip

Uploaded
author alermine
date Wed, 14 Nov 2012 06:04:04 -0500
parents 2ec3ba0e9e70
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1 #:::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@j::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
2 #::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E:::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
3 #:ttt:::::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@p@;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
4 #:t:::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
5 #:t::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
6 #:::::::::::::z;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
7 #::::::::::::i@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
8 #::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@$@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
9 #:::::::::::3@@@@@@@@@@@@@@@@@@@@@@@@@B@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
10 #::::::::::3@@@@@@@@@@@@@@@@@@@@@BEEESSE5EEEEBBM@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
11 #::::::::::3@@@@@@@@@@@@@@@@@@@@BEEEEEE35EE55E2355E5SBMB@@@@@@@@@@@@@@@@@$
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
12 #::::::::::@@@@@@@@@@@@@@@@@@@EEEE55533t3tttt::::::!!!!7755E755SBBMMM@@@MM
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
13 #::::::::::3@@@@@@@@@@@@@@@@@@EEEE2t3ttttt:::::::::::::::::::::::!7?5225EE
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
14 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEE31t::::::::::::::::::::::::::::::::3E5@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
15 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEEtt:::::::::::::::::::::::::::::::::353
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
16 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEE1ttz::::::::::::::::::::::::::::::::35
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
17 #:::::::::::@@@@@@@@@@@@@@@@@@EEEEEEEtz1::::::::::::::::::::::::::::::::t:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
18 #:::::::::!3@@@@@@@@@@@@@@@@@@@EEEEEttt::::::::::::::::::::::::::::::::;zz
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
19 #::::::::::@@@@@@@@@@@@@@@@@@@@EEEEEttt:::::z;z:::::::::::::::::::::::::13
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
20 #::::::::::3B@@@@@@@@@@@@@@@@@@EEEEEEE3tt:czzztti;:::::::::::::::::::::::3
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
21 #::::ttt::::3@@@@@@@@@@@@@@@@EEEEE5EE25Ezt1EEEz5Etzzz;;;;:::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
22 #:::::::::::I9@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEE@@@@@@@@@@@@@@Ez;:::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
23 #:::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ez::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
24 #::::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@BE5EBB@@@@@@@@@@@@@@@EEE:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
25 #:::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@E1::35@@@@@@@@@@ME3MMME2::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
26 #:::::::::::::::?@@@@@@@@@@@@@@@@@@M@@@@@@@EE:::::3SB@@BBESEEt::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
27 #::::::::::::::::J$@@@@@@@B@@@@@@@@@@@@@@@@EE:::::::!35E33t:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
28 #:::::::::::::::::3@E@@@EE5EESE5EESE@@@@@@@Et::::::::::::tz:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
29 #:::::::::::::::::J@E$@EEE5133555SE@@@@@@@@Et:::::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
30 #::::::::::::::::::E@E@EEEEtt3523EEE@@@@@@@E::::::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
31 #:t::::::::::::::::JEE3@@@EEEEEEEEEE@@@@@@@E:::::::::t;:::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
32 #:t:::::::::::::::::!5ES@EEEEEEEEES@@@@@@@@@E;:::;;;:3Ez::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
33 #:t::::::::::::::::::::JE@@EEEEEEE@@@@@@@@@@@@@@@@ME!:::;:::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
34 #:tz::::::::::::::::::::JE@@@EEEE@@@@@@@@@@@@@@EE!:::::::t::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
35 #:t::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@ESBE::::::::::::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
36 #:::::::::::::::::::::::::Q@@@@@@@@@@@@@@@@EE3EE;:::::zzzz::::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
37 #:::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@@@@NN@@@@@@Ez:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
38 #:zt:::::::::::::::::::::::3@@@@EE@@@@@@@@@@EEEEt::;z113E5t:::::::::::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
39 #::tt:::::::::::::::::::::::3@@@E@@@@@@@@@@@@@@@@BEt::::::::::::::::t:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
40 #:tt:t:::::::::::::::::::::::?S@@@@@@@@@@@BBEEE51!::::::::::::::zzzEt:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
41 #::::::::::::::::::::::::::::::3Q@@@@@@@BEEEEEt:::::::::::::;zz@@@EE::::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
42 #::::::::::::::::::::::::::::::::75B@@@@@EEEtt;:::::::::;zz@@@@BEEEtz:::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
43 #::::::::::::::::::::::::::::::::::::?9@@@@@@@@@@@E2Ezg@@@@@B@@@EEEE1t::::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
44 #:::::::::::::::::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@E@EEEEEEEzzz::
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
45 #::::::::::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@EEEEEEE5ttttt
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
46 #:::::::::::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEEEEEEEtzt
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
47 #::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@E@@EEEEEEEEEEEE@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
48 #::::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE3EEEE@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
49 #:::::::::::::::::::::;;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEt33@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
50 #:::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E@@@@@@EEEtg@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
51 #::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
52 #:::::::::::::@@@@@@@@@@@@@@@@@$@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
53 #::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
54 #
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
55 # Copyleft ↄ⃝ 2012 Institut Curie
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
56 # Author(s): Valentina Boeva, Alban Lermine (Institut Curie) 2012
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
57 # Contact: valentina.boeva@curie.fr, alban.lermine@curie.fr
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
58 # This software is distributed under the terms of the GNU General
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
59 # Public License, either Version 2, June 1991 or Version 3, June 2007.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
60
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
61 #!/usr/bin/perl -w
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
62 #outputs statistics for all genes in the list
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
63
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
64 #different boundaries
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
65 #no motif p-value for binding sites
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
66 #read directly No-resp/Up/down category
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
67
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
68 #all isoforms from the file with genes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
69
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
70 #RNApolII sites on junctions
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
71
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
72 use strict;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
73 use POSIX;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
74
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
75 my $usage = qq{
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
76 $0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
77
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
78 -----------------------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
79 mandatory parameters:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
80
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
81 -g filename file with all genes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
82 -rp filename file with sites of RNApolII
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
83 -k36 filename file with sites of K36me3
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
84 -tf filename file with sites of TF1
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
85
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
86 -i filename file with a table where to add colomnes
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
87 -add values which colomns to add
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
88
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
89 -----------------------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
90 optional parameters:
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
91
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
92 -o filename output filename (defaut "genes.output.txt")
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
93 -v verbose mode
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
94 -mir filename file with positions of miRNA
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
95 -k9 filename ile with sites of K9me3
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
96
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
97 -c_rp value cutoff for -rp
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
98 -c_k9 value cutoff for -k9
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
99 -c_k36 value cutoff for -k36
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
100
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
101 -selG filename selected genes (up-down-regulated)
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
102 -fluo filename file with fluorescence
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
103 -gc filename file with gc-islands
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
104
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
105 -long for each gene take the longest isoform
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
106
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
107
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
108 };
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
109
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
110 if(scalar(@ARGV) == 0){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
111 print $usage;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
112 exit(0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
113 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
114
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
115 ## mandatory arguments
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
116
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
117 my $RNApolFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
118 my $H3K36Me3polFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
119 my $H3K9Me3polFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
120 my $TF1Filename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
121 my $TF2Filename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
122 my $GenesFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
123 my $MirFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
124 my $TF1FilenameALL = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
125 my $TF2FilenameALL = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
126 my $SelectedGenesFilename = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
127 my $fluoFile = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
128 my $initialTable = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
129 my $colomnesToAdd = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
130
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
131 my $enhLeft = -30000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
132 my $longEnhLeft = -60000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
133 my $enhRight = -1500;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
134 my $immediateDownstream = 2000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
135 my $K9dist = 5000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
136 my $kb5 = 5000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
137 my $INFINITY = 10000000000;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
138 my $jonctionSize = 50;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
139 ## optional arguments
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
140 my $outname = "genes.output.txt";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
141 my $verbose = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
142 my $GCislands = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
143
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
144 my $longest = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
145
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
146 #my $cutoff_tf1 = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
147 #my $cutoff_tf2 = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
148 my $cutoff_tf1All = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
149 my $cutoff_tf2All = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
150 my $cutoff_rp = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
151 my $cutoff_k9 = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
152 my $cutoff_k36 = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
153 my $ifTFcoord = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
154
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
155 ## parse command line arguments
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
156
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
157 while(scalar(@ARGV) > 0){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
158 my $this_arg = shift @ARGV;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
159 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
160
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
161 elsif ( $this_arg eq '-selG') {$SelectedGenesFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
162
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
163 elsif ( $this_arg eq '-g') {$GenesFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
164 elsif ( $this_arg eq '-rp') {$RNApolFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
165 elsif ( $this_arg eq '-k36') {$H3K36Me3polFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
166 elsif ( $this_arg eq '-k9') {$H3K9Me3polFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
167
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
168 elsif ( $this_arg eq '-tf') {$TF1Filename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
169
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
170 elsif ( $this_arg eq '-v') {$verbose = 1;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
171
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
172 elsif ( $this_arg eq '-long') {$longest = 1;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
173
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
174
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
175 elsif ( $this_arg eq '-o') {$outname = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
176 elsif ( $this_arg eq '-mir') {$MirFilename = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
177
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
178 elsif ( $this_arg eq '-c_rp') {$cutoff_rp = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
179 elsif ( $this_arg eq '-c_k9') {$cutoff_k9 = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
180 elsif ( $this_arg eq '-c_k36') {$cutoff_k36 = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
181
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
182 elsif ( $this_arg eq '-fluo') {$fluoFile = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
183 elsif ( $this_arg eq '-gc') {$GCislands = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
184
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
185 elsif ( $this_arg eq '-i') {$initialTable = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
186 elsif ( $this_arg eq '-add') {$colomnesToAdd = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
187
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
188 elsif ( $this_arg eq '-lp') {$enhRight = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
189 elsif ( $this_arg eq '-rightp') {$immediateDownstream = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
190 elsif ( $this_arg eq '-enh') {$enhLeft = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
191 elsif ( $this_arg eq '-dg') {$kb5 = shift @ARGV;}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
192
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
193 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
194 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
195
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
196
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
197 if ( $GenesFilename eq "" ){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
198 die "you should specify a file with genes \n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
199 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
200 if(( $RNApolFilename eq "")&&($H3K36Me3polFilename eq "")&&($TF1Filename eq "")&&($H3K9Me3polFilename eq "")){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
201 die "you should specify at least one file with peaks\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
202 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
203
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
204
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
205 #-----------read selected genes----------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
206 my %selectedGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
207 my %selectedGenesFoldChange;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
208 if ( $SelectedGenesFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
209 open (FILE, "<$SelectedGenesFilename ") or die "Cannot open file $SelectedGenesFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
210 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
211 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
212 my @a = split/\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
213 $selectedGenes{$a[1]} = $a[3];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
214 $selectedGenesFoldChange{$a[1]} = $a[2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
215 #print "gene:$a[1],reg:$selectedGenes{$a[1]},FC:$selectedGenesFoldChange{$a[1]}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
216 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
217
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
218 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
219 print "\t\t$SelectedGenesFilename is read!\n" if ($verbose);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
220 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
221
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
222 #-----------read genes with fluorescence---------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
223 my %fluoGenes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
224 if ( $fluoFile ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
225 open (FILE, "<$fluoFile ") or die "Cannot open file $fluoFile !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
226 my $gene = "";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
227 my $med = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
228 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
229 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
230 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
231 my @a = split/\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
232
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
233 next if (scalar(@a)<5);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
234 next if ($a[0] eq "probesets");
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
235 next unless ($a[0] =~m/\S/);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
236 next unless ($a[4] =~m/\S/);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
237 if ($gene ne "" && $gene ne $a[2]) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
238 #calcMed
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
239 $med = med(keys %h);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
240 $fluoGenes{$gene} = $med;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
241 $med=0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
242 delete @h{keys %h};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
243 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
244 #$h{$a[4]} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
245 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
246 $gene = $a[2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
247 $h{$a[4]} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
248 #print "keys ", scalar(keys %h),"\t",keys %h,"\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
249 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
250 if ($gene ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
251 $med = med(keys %h);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
252 $fluoGenes{$gene} = $med;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
253 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
254 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
255 print "\t\t$fluoFile is read!\n" if ($verbose);;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
256 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
257 #-----------read GC-islands----------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
258 my %GCislands;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
259 if ($GCislands ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
260 open (FILE, "<$GCislands ") or die "Cannot open file $GCislands !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
261
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
262 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
263 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
264 my @a = split/\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
265 #bin chrom chromStart chromEnd name length cpgNum gcNum perCpg perGc obsExp
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
266 #107 chr1 36568608 36569851 CpG: 128 1243 128 766 20.6 61.6 1.09
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
267 my $chr = $a[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
268 my $start = $a[2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
269 my $end = $a[3];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
270 $GCislands{$chr}->{$start}=$end;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
271 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
272 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
273 if ($verbose) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
274 print "$GCislands is read\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
275 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
276 } elsif ($verbose) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
277 print "you did not specify a file with GC-islands\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
278 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
279
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
280 #-----------read genes----------------
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
281
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
282 my %genes;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
283
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
284 my $count = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
285
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
286 open (GENES, "<$GenesFilename") or die "Cannot open file $GenesFilename!!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
287 <GENES>;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
288 while (<GENES>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
289 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
290 if (/(chr.*)\s([+-])\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\S+)\s(\S+)\s\S+\s(\S+)/){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
291 my $name = $10;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
292 my $chr = $1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
293 my $strand = $2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
294 if ($strand eq '+') {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
295 $strand = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
296 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
297 else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
298 $strand = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
299 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
300 my $leftPos = $3;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
301 my $rightPos = $4;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
302 my $cdsStart= $5;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
303 my $cdsEnd= $6;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
304 my $exonCount= $7;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
305 my $exonStarts= $8;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
306 my $exonEnds= $9;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
307 my $ID = "$name\t$chr:$leftPos-$rightPos\t$count";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
308 my $foldChange = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
309 my $reg = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
310 my $fluo = "NA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
311 if ( $SelectedGenesFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
312 #print "$name\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
313 if (exists($selectedGenes{$name})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
314 $reg = $selectedGenes{$name};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
315 $foldChange = $selectedGenesFoldChange{$name};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
316 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
317 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
318 if ( $fluoFile ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
319 if (exists($fluoGenes{$name})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
320 $fluo = $fluoGenes{$name};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
321 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
322 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
323 unless (exists($genes{$chr})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
324 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
325 $genes{$chr} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
326 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
327
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
328 my $RNAlength = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
329 my $skip = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
330
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
331 #print "$ID\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
332 if($longest) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
333 $RNAlength = getRNAlength($exonStarts,$exonEnds);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
334 for my $IDgene (keys %{$genes{$chr}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
335 my $nameGene= (split('\t', $IDgene))[0];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
336 if ($nameGene eq $name && $RNAlength > $genes{$chr}->{$IDgene}{'RNAlength'}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
337 #print "found longer isofome: $ID longer than $IDgene\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
338 # print "$RNAlength > ".$genes{$chr}->{$IDgene}{'RNAlength'}."\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
339
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
340 $ID=$IDgene;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
341 } elsif ($nameGene eq $name && $RNAlength <= $genes{$chr}->{$IDgene}{'RNAlength'}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
342 #print "found shorter isofome: $ID shorted than $IDgene\nwill skip it\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
343 #print "$RNAlength <= ".$genes{$chr}->{$IDgene}{'RNAlength'}."\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
344 $skip = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
345 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
346 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
347 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
348
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
349
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
350 unless ($skip) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
351
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
352 unless (exists($genes{$chr}->{$ID})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
353 my %h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
354 $genes{$chr}->{$ID} = \%h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
355 $count++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
356 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
357
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
358 $genes{$chr}->{$ID}{'name'} = $name ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
359 $genes{$chr}->{$ID}{'left'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
360 $genes{$chr}->{$ID}{'right'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
361 $genes{$chr}->{$ID}{'cdsStart'} = $cdsStart;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
362 $genes{$chr}->{$ID}{'cdsEnd'} = $cdsEnd;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
363 $genes{$chr}->{$ID}{'strand'} = $strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
364 $genes{$chr}->{$ID}{'exonCount'} = $exonCount;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
365 $genes{$chr}->{$ID}{'exonStarts'} = $exonStarts;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
366 $genes{$chr}->{$ID}{'exonEnds'} = $exonEnds;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
367 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
368 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
369 $genes{$chr}->{$ID}{'reg'} = $reg;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
370 $genes{$chr}->{$ID}{'foldChange'} = $foldChange;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
371 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
372 $genes{$chr}->{$ID}{'RNAlength'} = $RNAlength ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
373
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
374 $genes{$chr}->{$ID}{'fluo'} = $fluo;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
375
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
376 $genes{$chr}->{$ID}{'RNApolScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
377 $genes{$chr}->{$ID}{'RNApolDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
378 $genes{$chr}->{$ID}{'RNApol_junctionScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
379 $genes{$chr}->{$ID}{'RNApol_junctionDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
380
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
381 $genes{$chr}->{$ID}{'K36score'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
382 $genes{$chr}->{$ID}{'K9promScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
383 $genes{$chr}->{$ID}{'K9promDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
384 $genes{$chr}->{$ID}{'K9largeScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
385 $genes{$chr}->{$ID}{'K9largeDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
386 $genes{$chr}->{$ID}{'TFpromScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
387 $genes{$chr}->{$ID}{'TFpromDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
388 $genes{$chr}->{$ID}{'TFenhScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
389 $genes{$chr}->{$ID}{'TFenhDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
390 $genes{$chr}->{$ID}{'TFintraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
391 $genes{$chr}->{$ID}{'TFintraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
392 $genes{$chr}->{$ID}{'TFallScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
393 $genes{$chr}->{$ID}{'TFallDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
394
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
395
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
396 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
397 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
398 $genes{$chr}->{$ID}{'TFFirstIntronScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
399 $genes{$chr}->{$ID}{'TFFirstIntronDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
400 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
401 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
402 $genes{$chr}->{$ID}{'TFImmDownScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
403 $genes{$chr}->{$ID}{'TFImmDownDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
404 $genes{$chr}->{$ID}{'TFpromSimpleScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
405 $genes{$chr}->{$ID}{'TFpromSimpleDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
406 $genes{$chr}->{$ID}{'TFenh60kbScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
407 $genes{$chr}->{$ID}{'TFenh60kbDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
408
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
409 $genes{$chr}->{$ID}{'TF_FirstExonScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
410 $genes{$chr}->{$ID}{'TF_FirstExonDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
411 $genes{$chr}->{$ID}{'TF_junctionScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
412 $genes{$chr}->{$ID}{'TF_junctionDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
413
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
414 $genes{$chr}->{$ID}{'TF_junctionAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
415 $genes{$chr}->{$ID}{'TF_junctionAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
416 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
417 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
418
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
419 $genes{$chr}->{$ID}{'TF_OtherExonsScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
420 $genes{$chr}->{$ID}{'TF_OtherExonsDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
421 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
422 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
423
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
424
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
425 $genes{$chr}->{$ID}{'TF_OtherIntronsScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
426 $genes{$chr}->{$ID}{'TF_OtherIntronsDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
427 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
428 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
429
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
430 $genes{$chr}->{$ID}{'TF5kbDownScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
431 $genes{$chr}->{$ID}{'TF5kbDownDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
432 $genes{$chr}->{$ID}{'K9enhScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
433 $genes{$chr}->{$ID}{'K9enhDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
434
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
435 ($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = getFirstIntron ($exonCount,$exonStarts,$exonEnds,$strand);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
436 ($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) = getFirstExon ($exonCount,$exonStarts,$exonEnds,$strand);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
437
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
438 $genes{$chr}->{$ID}{'exonCount'} = $exonCount;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
439 $genes{$chr}->{$ID}{'exonStarts'} = $exonStarts;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
440 $genes{$chr}->{$ID}{'exonEnds'} = $exonEnds;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
441
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
442
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
443 $genes{$chr}->{$ID}{'GCisland'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
444 if ($GCislands ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
445 $genes{$chr}->{$ID}{'GCisland'} = checkIfGC ($genes{$chr}->{$ID}{'TSS'},$strand,2000,$GCislands{$chr});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
446 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
447 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
448 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
449 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
450
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
451 print "Total genes (including isoforms) : $count\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
452 close GENES;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
453 print "\t\t$GenesFilename is read!\n" if ($verbose);;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
454 #for my $gName (sort keys %{$genes{'chr18'}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
455
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
456 # print "$gName\t$genes{'chr18'}->{$gName}{'TSS'}\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
457 #}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
458
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
459 #-----------read file with sites miRNA, store as genes-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
460
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
461 if ( $MirFilename eq ""){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
462 print "you did not specify file with miRNA\n" if ($verbose);;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
463 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
464 else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
465 $count = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
466 open (MIR, "<$MirFilename ") or die "Cannot open file $MirFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
467 #chr1 20669090 20669163 mmu-mir-206 960 +
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
468 while (<MIR>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
469 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
470 my ($name, $chr, $leftPos, $rightPos, $strand );
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
471 #1 . miRNA 20669091 20669163 . + . ACC="MI0000249"; ID="mmu-mir-206";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
472 if (/([0-9XYM]+)\s.\smiRNA\s(\d+)\s(\d+)\s.\s([+-])\s.\sACC=.*ID=\"(.*)\"/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
473 $name = $5;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
474 $chr = $1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
475 $leftPos = $2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
476 $rightPos = $3;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
477 $strand = $4;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
478 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
479 elsif (/(.*)\s(\d+)\s(\d+)\s(.*)\s(.*)\s(.*)/){
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
480 $name = $4;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
481 $chr = $1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
482 $leftPos = $2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
483 $rightPos = $3;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
484 $strand = $6;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
485 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
486 next;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
487 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
488
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
489 unless ($chr =~ m/chr/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
490 $chr = "chr".$chr;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
491 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
492 my $ID = "$name\t$chr:$leftPos-$rightPos\t$count";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
493
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
494 if ($strand eq '+') {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
495 $strand = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
496 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
497 else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
498 $strand = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
499 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
500
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
501 unless (exists($genes{$chr})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
502 my %h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
503 $genes{$chr} = \%h;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
504 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
505 unless (exists($genes{$chr}->{$ID})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
506 my %h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
507 $genes{$chr}->{$ID} = \%h1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
508 $count++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
509 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
510 $genes{$chr}->{$ID}{'name'} = $name ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
511 $genes{$chr}->{$ID}{'left'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
512 $genes{$chr}->{$ID}{'right'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
513 $genes{$chr}->{$ID}{'cdsStart'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
514 $genes{$chr}->{$ID}{'cdsEnd'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
515 $genes{$chr}->{$ID}{'strand'} = $strand;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
516 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
517 $genes{$chr}->{$ID}{'exonCount'} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
518 $genes{$chr}->{$ID}{'exonStarts'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
519 $genes{$chr}->{$ID}{'exonEnds'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
520 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
521 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
522 $genes{$chr}->{$ID}{'reg'} = "miRNA";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
523 $genes{$chr}->{$ID}{'foldChange'} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
524
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
525 $genes{$chr}->{$ID}{'fluo'} = "N/A";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
526
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
527
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
528 $genes{$chr}->{$ID}{'RNApolScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
529 $genes{$chr}->{$ID}{'RNApolDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
530 $genes{$chr}->{$ID}{'RNApol_junctionScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
531 $genes{$chr}->{$ID}{'RNApol_junctionDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
532 $genes{$chr}->{$ID}{'K36score'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
533 $genes{$chr}->{$ID}{'K9promScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
534 $genes{$chr}->{$ID}{'K9promDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
535 $genes{$chr}->{$ID}{'K9largeScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
536 $genes{$chr}->{$ID}{'K9largeDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
537 $genes{$chr}->{$ID}{'TFpromScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
538 $genes{$chr}->{$ID}{'TFpromDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
539 $genes{$chr}->{$ID}{'TFenhScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
540 $genes{$chr}->{$ID}{'TFenhDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
541 $genes{$chr}->{$ID}{'TFintraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
542 $genes{$chr}->{$ID}{'TFintraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
543 $genes{$chr}->{$ID}{'TFallScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
544 $genes{$chr}->{$ID}{'TFallDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
545
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
546 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
547 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
548 $genes{$chr}->{$ID}{'TFFirstIntronScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
549 $genes{$chr}->{$ID}{'TFFirstIntronDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
550 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
551 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
552 $genes{$chr}->{$ID}{'TFImmDownScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
553 $genes{$chr}->{$ID}{'TFImmDownDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
554 $genes{$chr}->{$ID}{'TFpromSimpleScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
555 $genes{$chr}->{$ID}{'TFpromSimpleDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
556
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
557 $genes{$chr}->{$ID}{'TF_FirstExonScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
558 $genes{$chr}->{$ID}{'TF_FirstExonDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
559 $genes{$chr}->{$ID}{'TF_OtherExonsScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
560 $genes{$chr}->{$ID}{'TF_OtherExonsDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
561 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
562 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
563 $genes{$chr}->{$ID}{'TF_junctionScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
564 $genes{$chr}->{$ID}{'TF_junctionDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
565 $genes{$chr}->{$ID}{'TF_OtherIntronsScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
566 $genes{$chr}->{$ID}{'TF_OtherIntronsDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
567 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
568 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
569
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
570 $genes{$chr}->{$ID}{'TF_junctionAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
571 $genes{$chr}->{$ID}{'TF_junctionAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
572 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
573 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
574
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
575 $genes{$chr}->{$ID}{'TFenh60kbScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
576 $genes{$chr}->{$ID}{'TFenh60kbDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
577
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
578 $genes{$chr}->{$ID}{'TF5kbDownScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
579 $genes{$chr}->{$ID}{'TF5kbDownDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
580 $genes{$chr}->{$ID}{'K9enhScore'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
581 $genes{$chr}->{$ID}{'K9enhDist'} = $INFINITY;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
582
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
583 ($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
584 ($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) = (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
585
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
586 $genes{$chr}->{$ID}{'GCisland'} = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
587
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
588 $genes{$chr}->{$ID}{'exonCount'} = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
589 $genes{$chr}->{$ID}{'exonStarts'} = $leftPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
590 $genes{$chr}->{$ID}{'exonEnds'} = $rightPos ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
591
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
592
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
593 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
594
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
595
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
596 close MIR;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
597 print "\t\t$MirFilename is read!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
598 print "$count miRNA\n" if ($verbose);;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
599 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
600
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
601 #-----------read file with sites of TF1-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
602 my $numberOfAllSites = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
603
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
604 if ($TF1Filename eq "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
605 print "No file with peaks of TF1!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
606 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
607 open (FILE, "<$TF1Filename ") or die "Cannot open file $TF1Filename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
608 $_ = <FILE>;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
609 my $correction = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
610 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
611 if ( $a[1] =~ m/chr/ ) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
612 $correction = 1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
613 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
614
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
615 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
616 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
617
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
618 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
619
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
620 my $chr = $a[0+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
621 my $firstPos = $a[1+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
622 my $LastPos = $a[2+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
623 my $maxPos = $a[3+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
624 if ($maxPos=~/\D/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
625 $maxPos = int(($firstPos+$LastPos)/2);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
626 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
627 my $score = $a[4+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
628
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
629 for my $ID (keys %{$genes{$chr}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
630
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
631 my $distTSS = ($maxPos - $genes{$chr}->{$ID}{'TSS'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
632 my $distTE = ($maxPos - $genes{$chr}->{$ID}{'TE'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
633
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
634 if (($distTSS>= $enhLeft)&&($distTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
635 if ($genes{$chr}->{$ID}{'TFenhScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
636 $genes{$chr}->{$ID}{'TFenhScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
637 $genes{$chr}->{$ID}{'TFenhDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
638 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
639 } elsif (($distTSS>= $enhRight)&&($distTSS<=$immediateDownstream)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
640 if ($genes{$chr}->{$ID}{'TFpromScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
641 $genes{$chr}->{$ID}{'TFpromScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
642 $genes{$chr}->{$ID}{'TFpromDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
643 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
644 } elsif (($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
645 if ($genes{$chr}->{$ID}{'TFintraScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
646 $genes{$chr}->{$ID}{'TFintraScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
647 $genes{$chr}->{$ID}{'TFintraDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
648 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
649 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
650 if (($distTSS>= $enhLeft)&&($distTE<=$kb5)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
651 if ($genes{$chr}->{$ID}{'TFallScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
652 $genes{$chr}->{$ID}{'TFallScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
653 $genes{$chr}->{$ID}{'TFallDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
654 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
655 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
656
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
657 if (($distTSS>= 0)&&($distTSS<=$immediateDownstream)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
658 if ($genes{$chr}->{$ID}{'TFImmDownScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
659 $genes{$chr}->{$ID}{'TFImmDownScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
660 $genes{$chr}->{$ID}{'TFImmDownDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
661 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
662 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
663 if (($distTSS<= 0)&&($distTSS>=$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
664 if ($genes{$chr}->{$ID}{'TFpromSimpleScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
665 $genes{$chr}->{$ID}{'TFpromSimpleScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
666 $genes{$chr}->{$ID}{'TFpromSimpleDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
667 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
668 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
669
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
670 my ($firstIntronStart,$firstIntronEnd)=($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
671 ($firstIntronStart,$firstIntronEnd)= ($firstIntronEnd,$firstIntronStart) if ($firstIntronStart>$firstIntronEnd) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
672
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
673 my ($firstExonStart,$firstExonEnd) = ($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
674 ($firstExonStart,$firstExonEnd)= ($firstExonEnd,$firstExonStart) if ($firstExonStart>$firstExonEnd) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
675
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
676 if ($maxPos>=$firstIntronStart && $maxPos <= $firstIntronEnd) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
677 if ($genes{$chr}->{$ID}{'TFFirstIntronScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
678 $genes{$chr}->{$ID}{'TFFirstIntronScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
679 $genes{$chr}->{$ID}{'TFFirstIntronDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
680 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
681
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
682 if (($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
683 if ($genes{$chr}->{$ID}{'TFFirstIntronAndIntraScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
684 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
685 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
686 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
687 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
688 } elsif (($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
689 if ($genes{$chr}->{$ID}{'TFintraMinusFirstIntronScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
690 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
691 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
692 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
693 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
694 if (($distTSS>= $longEnhLeft)&&($distTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
695 if ($genes{$chr}->{$ID}{'TFenh60kbScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
696 $genes{$chr}->{$ID}{'TFenh60kbScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
697 $genes{$chr}->{$ID}{'TFenh60kbDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
698 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
699 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
700 if (($distTE>=0)&&($distTE<=$kb5)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
701 if ($genes{$chr}->{$ID}{'TF5kbDownScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
702 $genes{$chr}->{$ID}{'TF5kbDownScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
703 $genes{$chr}->{$ID}{'TF5kbDownDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
704 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
705 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
706 if ($distTSS>=0 && $distTE<=0) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
707 my $typeIntra = &getTypeIntra($genes{$chr}->{$ID},$maxPos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
708 if ($typeIntra eq "f_exon") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
709 if ($genes{$chr}->{$ID}{'TF_FirstExonScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
710 $genes{$chr}->{$ID}{'TF_FirstExonScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
711 $genes{$chr}->{$ID}{'TF_FirstExonDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
712 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
713
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
714 if ($genes{$chr}->{$ID}{'TF_FirstExonAndIntraScore'}<$score && ($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
715 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
716 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
717 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
718
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
719 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
720 if ($typeIntra eq "exon") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
721 if ($genes{$chr}->{$ID}{'TF_OtherExonsScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
722 $genes{$chr}->{$ID}{'TF_OtherExonsScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
723 $genes{$chr}->{$ID}{'TF_OtherExonsDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
724 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
725
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
726 if (($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
727 if ($genes{$chr}->{$ID}{'TF_OtherExonsAndIntraScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
728 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
729 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
730 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
731 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
732
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
733 } elsif ($typeIntra eq "intron") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
734 if ($genes{$chr}->{$ID}{'TF_OtherIntronsScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
735 $genes{$chr}->{$ID}{'TF_OtherIntronsScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
736 $genes{$chr}->{$ID}{'TF_OtherIntronsDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
737 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
738
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
739 if (($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
740 if ($genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
741 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
742 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
743 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
744 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
745 } elsif ($typeIntra eq "jonction") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
746 if ($genes{$chr}->{$ID}{'TF_junctionScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
747 $genes{$chr}->{$ID}{'TF_junctionScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
748 $genes{$chr}->{$ID}{'TF_junctionDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
749 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
750
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
751 if ($genes{$chr}->{$ID}{'TF_junctionAndIntraScore'}<$score && ($distTSS >= $immediateDownstream)&&($distTE<=0)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
752 $genes{$chr}->{$ID}{'TF_junctionAndIntraScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
753 $genes{$chr}->{$ID}{'TF_junctionAndIntraDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
754 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
755
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
756 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
757
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
758 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
759 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
760
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
761
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
762 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
763 $numberOfAllSites++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
764 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
765
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
766 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
767 print "\t$TF1Filename is read!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
768 print "$numberOfAllSites sites\n" ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
769 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
770
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
771 #-----------read file with sites RNApolII-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
772 $numberOfAllSites = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
773
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
774 if ($RNApolFilename eq "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
775 print "No file with peaks of RNA pol II!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
776 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
777 open (FILE, "<$RNApolFilename ") or die "Cannot open file $RNApolFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
778 $_ = <FILE>;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
779 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
780 my $correction = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
781
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
782 if ($a[0]=~m/chr/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
783 $correction = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
784 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
785 #seek (FILE, 0, 0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
786 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
787 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
788
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
789 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
790
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
791 my $chr = $a[1+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
792 my $firstPos = $a[2+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
793 my $LastPos = $a[3+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
794 my $maxPos = $a[4+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
795 my $score = $a[5+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
796 #print "$numberOfAllSites: $score\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
797 next if ($score < $cutoff_rp);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
798
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
799 for my $ID (keys %{$genes{$chr}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
800
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
801 my $distTSS = ($maxPos - $genes{$chr}->{$ID}{'TSS'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
802 my $distTE = ($maxPos - $genes{$chr}->{$ID}{'TE'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
803
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
804 if (($distTSS>= $enhRight)&&($distTSS<=$immediateDownstream)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
805 if ($genes{$chr}->{$ID}{'RNApolScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
806 $genes{$chr}->{$ID}{'RNApolScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
807 $genes{$chr}->{$ID}{'RNApolDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
808 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
809 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
810 if ($distTSS>=0 && $distTE<=0) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
811 my $typeIntra = &getTypeIntra($genes{$chr}->{$ID},$maxPos);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
812 if ($typeIntra eq "jonction") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
813 if ($genes{$chr}->{$ID}{'RNApol_junctionScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
814 $genes{$chr}->{$ID}{'RNApol_junctionScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
815 $genes{$chr}->{$ID}{'RNApol_junctionDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
816 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
817 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
818 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
819 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
820
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
821 $numberOfAllSites++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
822 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
823 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
824 print "\t$RNApolFilename is read!\n$numberOfAllSites sites\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
825
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
826 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
827
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
828 #-----------read file with sites K36me3-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
829 $numberOfAllSites = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
830 my @K36Score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
831
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
832 if ($H3K36Me3polFilename eq "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
833 print "No file with peaks of H3K36me3!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
834 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
835 open (FILE, "<$H3K36Me3polFilename ") or die "Cannot open file $H3K36Me3polFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
836
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
837 $_ = <FILE>;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
838 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
839 my $correction = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
840
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
841 if ($a[0]=~m/chr/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
842 $correction = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
843 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
844
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
845 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
846 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
847
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
848 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
849
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
850 my $chr = $a[1+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
851 my $firstPos = $a[2+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
852 my $lastPos = $a[3+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
853 my $maxPos = $a[4+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
854 my $score= $a[5+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
855
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
856 for my $ID (keys %{$genes{$chr}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
857
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
858 my $distTSS = ($maxPos - $genes{$chr}->{$ID}{'TSS'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
859 my $distTE = ($maxPos - $genes{$chr}->{$ID}{'TE'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
860
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
861 my $scoreToadd = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
862
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
863 if (($firstPos>=$genes{$chr}->{$ID}{'left'})&&($lastPos<=$genes{$chr}->{$ID}{'right'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
864 $scoreToadd = $score/2.*($lastPos-$firstPos+1)/($genes{$chr}->{$ID}{'right'}-$genes{$chr}->{$ID}{'left'}+1);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
865 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
866 if (($firstPos>=$genes{$chr}->{$ID}{'left'})&&($firstPos<$genes{$chr}->{$ID}{'right'})&&($lastPos>$genes{$chr}->{$ID}{'right'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
867 $scoreToadd = $score/2.*($genes{$chr}->{$ID}{'right'}-$firstPos+1)/($genes{$chr}->{$ID}{'right'}-$genes{$chr}->{$ID}{'left'}+1);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
868 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
869 if (($firstPos<$genes{$chr}->{$ID}{'left'})&&($lastPos>$genes{$chr}->{$ID}{'left'})&&($lastPos<=$genes{$chr}->{$ID}{'right'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
870 $scoreToadd = $score/2.*($lastPos-$genes{$chr}->{$ID}{'left'}+1)/($genes{$chr}->{$ID}{'right'}-$genes{$chr}->{$ID}{'left'}+1);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
871 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
872 if (($firstPos<$genes{$chr}->{$ID}{'left'})&&($lastPos>$genes{$chr}->{$ID}{'right'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
873 my $scoreToadd = $score/2.*($lastPos-$firstPos+1);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
874 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
875 $genes{$chr}->{$ID}{'K36score'} += $scoreToadd;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
876 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
877 $numberOfAllSites++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
878 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
879 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
880 print "\t$H3K36Me3polFilename is read!\n$numberOfAllSites sites\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
881 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
882
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
883 #-----------read file with sites H3K9me3-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
884 $numberOfAllSites = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
885
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
886
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
887 if ($H3K9Me3polFilename eq "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
888 print "No file with peaks of H3K9me3!\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
889 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
890 open (FILE, "<$H3K9Me3polFilename ") or die "Cannot open file $H3K9Me3polFilename !!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
891
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
892 $_ = <FILE>;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
893 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
894 my $correction = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
895
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
896 if ($a[0]=~m/chr/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
897 $correction = -1;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
898 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
899
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
900 while (<FILE>) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
901 chomp;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
902 my @a = split /\t/;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
903 my $chr = $a[1+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
904 my $firstPos = $a[2+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
905 my $LastPos = $a[3+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
906 my $maxPos = $a[4+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
907 my $score = $a[5+$correction];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
908
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
909 next if ($score < $cutoff_k9);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
910
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
911 for my $ID (keys %{$genes{$chr}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
912
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
913 my $distTSS = ($maxPos - $genes{$chr}->{$ID}{'TSS'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
914 my $distTE = ($maxPos - $genes{$chr}->{$ID}{'TE'})*$genes{$chr}->{$ID}{'strand'};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
915
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
916 if (($distTSS>= $enhRight)&&($distTSS<=$immediateDownstream)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
917 if ($genes{$chr}->{$ID}{'K9promScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
918 $genes{$chr}->{$ID}{'K9promScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
919 $genes{$chr}->{$ID}{'K9promDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
920 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
921 } elsif (($distTSS >= -$K9dist)&&($distTSS<=$K9dist)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
922 if ($genes{$chr}->{$ID}{'K9largeScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
923 $genes{$chr}->{$ID}{'K9largeScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
924 $genes{$chr}->{$ID}{'K9largeDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
925 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
926 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
927 if (($distTSS>= $enhLeft)&&($distTSS<$enhRight)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
928 if ($genes{$chr}->{$ID}{'K9enhScore'}<$score) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
929 $genes{$chr}->{$ID}{'K9enhScore'}=$score;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
930 $genes{$chr}->{$ID}{'K9enhDist'} = $distTSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
931 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
932 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
933 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
934 $numberOfAllSites++;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
935 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
936 close FILE;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
937 print "\t$H3K9Me3polFilename is read!\n$numberOfAllSites sites\n" if ($verbose) ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
938 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
939
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
940 #-----------output all-----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
941 #unless($initialTable eq "") {}
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
942
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
943
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
944 open (OUT , ">$outname") or die "Cannot open file $outname!!!!: $!";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
945
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
946 print OUT "name\tchr\tstart\tend\tstrand\tReg\tfoldChange\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
947
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
948 if ($GCislands ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
949 print OUT "GC-island\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
950 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
951
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
952 if ( $fluoFile ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
953 print OUT "fluorescence\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
954 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
955
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
956 if ($RNApolFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
957 print OUT "RNApolII_score\tRNApolII_distTSS\tRNApol_junctionScore\tRNApol_junctionDist\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
958 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
959 if ($H3K36Me3polFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
960 print OUT "H3K36me3_score\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
961 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
962 if ($H3K9Me3polFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
963 print OUT "H3K9me3_score_prom\tH3K9me3_distTSS_prom\tH3K9me3_score_large\tH3K9me3_distTSS_large\tH3K9me3_score_enh\tH3K9me3_distTSS_enh\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
964 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
965 if ($TF1Filename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
966 print OUT "TF_score_Gene\tTF_distTSS_Gene\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
967 print OUT "TF_score_Promoter\tTF_distTSS_Promoter\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
968 print OUT "TF_score_ImmDown\tTF_distTSS_ImmDown\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
969 print OUT "TF_score_PromoterORImmDown\tTF_distTSS_PromoterORImmDown\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
970 print OUT "TF_score_Enhancer\tTF_distTSS_Enhancer\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
971 print OUT "TF_score_Intragenic\tTF_distTSS_Intragenic\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
972 print OUT "TF_score_GeneDownstream\tTF_distTSS_GeneDownstream\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
973 print OUT "TF_score_FirstExon\tTF_distTSS_FirstExon\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
974 print OUT "TF_score_FisrtIntron\tTF_distTSS_FisrtIntron\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
975 print OUT "TF_score_FirstExonAND>$immediateDownstream\tTF_distTSS_FirstExonAND>$immediateDownstream\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
976 print OUT "TF_score_FisrtIntronAND>$immediateDownstream\tTF_distTSS_FisrtIntronAND>$immediateDownstream\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
977 #print OUT "TF_score_IntraMinusFisrtIntron\tTF_distTSS_IntraMinusFisrtIntron\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
978
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
979 #print OUT "TF_score_enh60kb\tTF_distTSS_enh60kb\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
980
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
981 print OUT "TF_score_Exons2,3,4,etc\tTF_distTSS_Exons2,3,4,etc\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
982 print OUT "TF_score_Exons2,3,4,etcAND>$immediateDownstream\tTF_distTSS_Exons2,3,4,etcAND>$immediateDownstream\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
983
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
984 print OUT "TF_score_Introns2,3,4,etc\tTF_distTSS_Introns2,3,4,etc\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
985 print OUT "TF_score_Introns2,3,4,etcAND>$immediateDownstream\tTF_distTSS_Introns2,3,4,etcAND>$immediateDownstream\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
986
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
987 print OUT "TF_score_EIjunction\tTF_distTSS_EIjunction\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
988 print OUT "TF_score_EIjunctionAND>$immediateDownstream\tTF_distTSS_EIjunctionAND>$immediateDownstream";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
989 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
990
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
991 print OUT "\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
992
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
993 for my $chr (keys %genes) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
994 for my $ID (keys %{$genes{$chr}}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
995 print OUT "$genes{$chr}->{$ID}{'name'}\t$chr\t$genes{$chr}->{$ID}{'left'}\t$genes{$chr}->{$ID}{'right'}\t$genes{$chr}->{$ID}{'strand'}\t$genes{$chr}->{$ID}{'reg'}\t$genes{$chr}->{$ID}{'foldChange'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
996
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
997 if ($GCislands ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
998 print OUT "$genes{$chr}->{$ID}{'GCisland'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
999 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1000
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1001 if ( $fluoFile ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1002 print OUT "$genes{$chr}->{$ID}{'fluo'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1003 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1004
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1005 if ($RNApolFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1006 print OUT "$genes{$chr}->{$ID}{'RNApolScore'}\t$genes{$chr}->{$ID}{'RNApolDist'}\t$genes{$chr}->{$ID}{'RNApol_junctionScore'}\t$genes{$chr}->{$ID}{'RNApol_junctionDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1007 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1008 if ($H3K36Me3polFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1009 print OUT "$genes{$chr}->{$ID}{'K36score'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1010 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1011 if ($H3K9Me3polFilename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1012 print OUT "$genes{$chr}->{$ID}{'K9promScore'}\t$genes{$chr}->{$ID}{'K9promDist'}\t$genes{$chr}->{$ID}{'K9largeScore'}\t$genes{$chr}->{$ID}{'K9largeDist'}\t$genes{$chr}->{$ID}{'K9enhScore'}\t$genes{$chr}->{$ID}{'K9enhDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1013 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1014 if ($TF1Filename ne "") {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1015 print OUT "$genes{$chr}->{$ID}{'TFallScore'}\t$genes{$chr}->{$ID}{'TFallDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1016 print OUT "$genes{$chr}->{$ID}{'TFpromSimpleScore'}\t$genes{$chr}->{$ID}{'TFpromSimpleDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1017 print OUT "$genes{$chr}->{$ID}{'TFImmDownScore'}\t$genes{$chr}->{$ID}{'TFImmDownDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1018 print OUT "$genes{$chr}->{$ID}{'TFpromScore'}\t$genes{$chr}->{$ID}{'TFpromDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1019 print OUT "$genes{$chr}->{$ID}{'TFenhScore'}\t$genes{$chr}->{$ID}{'TFenhDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1020 print OUT "$genes{$chr}->{$ID}{'TFintraScore'}\t$genes{$chr}->{$ID}{'TFintraDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1021 print OUT "$genes{$chr}->{$ID}{'TF5kbDownScore'}\t$genes{$chr}->{$ID}{'TF5kbDownDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1022 print OUT "$genes{$chr}->{$ID}{'TF_FirstExonScore'}\t$genes{$chr}->{$ID}{'TF_FirstExonDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1023 print OUT "$genes{$chr}->{$ID}{'TF_FirstExonAndIntraScore'}\t$genes{$chr}->{$ID}{'TF_FirstExonAndIntraDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1024 print OUT "$genes{$chr}->{$ID}{'TFFirstIntronScore'}\t$genes{$chr}->{$ID}{'TFFirstIntronDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1025 print OUT "$genes{$chr}->{$ID}{'TFFirstIntronAndIntraScore'}\t$genes{$chr}->{$ID}{'TFFirstIntronAndIntraDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1026 #print OUT "$genes{$chr}->{$ID}{'TFintraMinusFirstIntronScore'}\t$genes{$chr}->{$ID}{'TFintraMinusFirstIntronDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1027
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1028 #print OUT "$genes{$chr}->{$ID}{'TFenh60kbScore'}\t$genes{$chr}->{$ID}{'TFenh60kbDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1029
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1030 print OUT "$genes{$chr}->{$ID}{'TF_OtherExonsScore'}\t$genes{$chr}->{$ID}{'TF_OtherExonsDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1031 print OUT "$genes{$chr}->{$ID}{'TF_OtherExonsAndIntraScore'}\t$genes{$chr}->{$ID}{'TF_OtherExonsAndIntraDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1032
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1033 print OUT "$genes{$chr}->{$ID}{'TF_OtherIntronsScore'}\t$genes{$chr}->{$ID}{'TF_OtherIntronsDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1034 print OUT "$genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraScore'}\t$genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1035
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1036 print OUT "$genes{$chr}->{$ID}{'TF_junctionScore'}\t$genes{$chr}->{$ID}{'TF_junctionDist'}\t";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1037 print OUT "$genes{$chr}->{$ID}{'TF_junctionAndIntraScore'}\t$genes{$chr}->{$ID}{'TF_junctionAndIntraDist'}";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1038 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1039 print OUT "\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1040 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1041 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1042
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1043 close OUT;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1044
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1045 ###################################
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1046 sub med {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1047 my @arr = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1048 my $med = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1049 @arr = sort {$a <=> $b} @arr;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1050 if ((scalar(@arr)/2) =~ m/[\.\,]5/) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1051 return $arr[floor(scalar(@arr)/2)];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1052 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1053 return ($arr[scalar(@arr)/2]+$arr[scalar(@arr)/2-1])/2;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1054 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1055 $med;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1056 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1057
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1058 sub checkIfGC {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1059 my ($TSS,$strand,$dist,$GCislandsChr)=@_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1060 my $ifGC = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1061 my $leftProm=$TSS-$dist;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1062 my $rightProm = $TSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1063 if ($strand== -1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1064 my $leftProm=$TSS;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1065 my $rightProm = $TSS+$dist;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1066 } #print "$leftProm\t"; print "$rightProm\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1067 for my $leftGC (keys %{$GCislandsChr}) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1068 my $rightGC = $GCislandsChr->{$leftGC};
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1069 if ($leftGC>=$leftProm&&$leftGC<=$rightProm || $rightGC>=$leftProm&&$rightGC<=$rightProm) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1070 return "GC-island";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1071 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1072 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1073 return $ifGC ;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1074 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1075
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1076 sub getFirstIntron {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1077 my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1078 my ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1079 if ($exonCount == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1080 return (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1081 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1082 if ($strand == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1083 $left = (split ",", $exonEnds)[0];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1084 $right = (split (",", $exonStarts))[1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1085 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1086 $left = (split (",", $exonEnds))[$exonCount-2];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1087 $right = (split (",", $exonStarts))[$exonCount-1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1088 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1089 ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1090 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1091
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1092 sub getFirstExon {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1093 my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1094 my ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1095 if ($exonCount == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1096 return (0,0);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1097 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1098 if ($strand == 1) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1099 $left = (split ",", $exonStarts)[0];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1100 $right = (split (",", $exonEnds))[0]-$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1101 } else {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1102 $left = (split (",", $exonStarts))[$exonCount-1]+$jonctionSize;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1103 $right = (split (",", $exonEnds))[$exonCount-1];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1104 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1105 ($left,$right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1106 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1107
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1108
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1109 sub getIntronExon {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1110 my ($pos,$exonCount,$exonStarts,$exonEnds,$strand) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1111 my (@left,@right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1112 @left = (split ",", $exonStarts);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1113 @right = (split (",", $exonEnds));
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1114
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1115 for (my $i = 0; $i<$exonCount;$i++) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1116 #print "$left[$i] <= $pos ? $pos <= $right[$i]\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1117 if (($left[$i]+$jonctionSize < $pos) && ($pos < $right[$i]-$jonctionSize)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1118 #print "URA!\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1119 return "exon";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1120 } elsif (($i+1<$exonCount)&&($right[$i]+$jonctionSize < $pos) && ($pos < $left[$i+1]-$jonctionSize)) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1121 return "intron";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1122 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1123 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1124 return "jonction";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1125 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1126
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1127
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1128 sub getTypeIntra {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1129
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1130 my ($geneEntry, $pos) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1131 my $type;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1132
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1133 if (($pos >= $geneEntry->{'firstIntronStart'})&&($pos <=$geneEntry->{'firstIntronEnd'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1134 return "f_intron";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1135 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1136 if (($pos >= $geneEntry->{'firstExonStart'})&&($pos <=$geneEntry->{'firstExonEnd'})) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1137 return "f_exon";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1138 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1139 $type = getIntronExon ($pos, $geneEntry->{'exonCount'},$geneEntry->{'exonStarts'},$geneEntry->{'exonEnds'},$geneEntry->{'strand'});
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1140 return $type;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1141 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1142
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1143 sub getRNAlength {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1144 my ($exonStarts,$exonEnds) = @_;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1145 my (@left,@right);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1146 @left = (split ",", $exonStarts);
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1147 @right = (split (",", $exonEnds));
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1148 my $length = 0;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1149 for (my $i = 0; $i<scalar(@right);$i++) {
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1150 $length+=$right[$i]-$left[$i];
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1151 }
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1152 #print STDERR "length = $length\n";
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1153 return $length;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1154 }