annotate SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/perl -w
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 ###
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # But : protocol permettant la detection d'RNA non codant potentiel
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # Entrees : fichier de mapping Smart gff3
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 # fichier gff des gènes
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # fichier gff des clusters Cis regulateur potentiel
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # Sortie : fichier gff des clusters ARN nc
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 ###------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 use vars qw($USAGE);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 use strict;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 =head1 NAME
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 interElementGff.pl - creation of a new Gff corresponding to the region of two successive Elements
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 =head1 SYNOPSIS
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 % interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 [-s 50] [-a 20] [-n seqName] [-h]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 =head1 DESCRIPTION
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 This script will determine cluster ok ncRNA.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 -i|--input fileName gff input file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 -o|--output fileName gff output file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 -n|--name seqName sequence name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 -p|--print print parameters used
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 -f5ff n number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 -ff3f n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 -f5fr n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 -ff3r n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 -fr3f n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 -fr5f n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 -f3rr n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 -fr5r n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 -r5ff n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 -rf3f n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 -r5fr n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 -rf3r n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 -r3rf n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 -rr5f n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 -r3rr n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 -rr5r n number... " ...[default 0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 [-h|--help] help mode then die
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 USAGE_CASE
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 % interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 -ff 53 -rr 23 -n NC_011744
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 BUG
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 Caution : input file needs to be sorted on positions
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 Caution : for -f/r options add +3 bp to include stop codon if not in input file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 =head1 AUTHOR - CTN - apr.11
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 (from RNA-Vibrio/protocol_NC_V2.pl - Claire KUCHLY)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 =cut
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 #----------------------------------------------------------------------------
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 # check command line :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 my ($IDfile, $OutputFileName, $f5ff, $ff3f, $f5fr, $ff3r, $f3rf, $fr5f, $f3rr,$fr5r, $r5ff, $rf3f, $r5fr, $rf3r, $r3rf, $rr5f, $r3rr, $rr5r, $seqName, $printParameters) =
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 (undef, undef , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", 0) ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 if ($#ARGV==0) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 die (exec("pod2text $0\n"));
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 } else {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 foreach my $num (0 .. $#ARGV) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 SWITCH: for ($ARGV[$num]) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 /--input|-i/ && do { $IDfile=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 open(F,"<$IDfile") or die "Error: Can't open \"$IDfile\", $!";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 /-f5ff/ && do { $f5ff=$ARGV[$num+1]+1; last; }; # need +1 for intervall computations
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 /-ff3f/ && do { $ff3f=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 /-f5fr/ && do { $f5fr=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 /-ff3r/ && do { $ff3r=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 /-f3rf/ && do { $f3rf=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 /-fr5f/ && do { $fr5f=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 /-f3rr/ && do { $f3rr=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 /-fr5r/ && do { $fr5r=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 /-r5ff/ && do { $r5ff=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 /-rf3f/ && do { $rf3f=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 /-r5fr/ && do { $r5fr=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 /-rf3r/ && do { $rf3r=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 /-r3rf/ && do { $r3rf=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 /-rr5f/ && do { $rr5f=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 /-r3rr/ && do { $r3rr=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 /-rr5r/ && do { $rr5r=$ARGV[$num+1]+1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 # /--name|-n/ && do { $seqName=$ARGV[$num+1]; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 /--print|-p/ && do { $printParameters=1; last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 /--output|-o/ && do { $OutputFileName=$ARGV[$num+1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 open(S,">$OutputFileName") or die "Error : Can't open result file \"$OutputFileName\", $!";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 last; };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 /--help|-h/ && do { exec("pod2text $0\n") ; die };
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 if ($printParameters) {
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 print "
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 --> f5ff ",$f5ff-1," --> ff3f ",$ff3f-1," --> ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 --> f5fr ",$f5fr-1," --> ff3r ",$ff3r-1," <-- ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 --> f3rf ",$f3rf-1," <-- fr5f ",$fr5f-1," --> ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 --> f3rr ",$f3rr-1," <-- fr5r ",$fr5r-1," <-- ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 <-- r5ff ",$r5ff-1," --> rf3f ",$rf3f-1," --> ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 <-- r5fr ",$r5fr-1," --> rf3r ",$rf3r-1," <-- ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 <-- r3rf ",$r3rf-1," <-- rr5f ",$rr5f-1," --> ;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 <-- r3rr ",$r3rr-1," <-- rr5r ",$rr5r-1," <-- ;\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 my $finSeedSens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 my $finSeedAntisens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 my $debSeedSens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 my $debSeedAntisens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 my $info_gene="";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 my @chromList;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 while(my $ligne = <F>){
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 chomp($ligne);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 my @list = split(/\t/,$ligne);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 if ((scalar(@chromList) == 0) or ($chromList[$#chromList] ne $list[0])){
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 push(@chromList, $list[0]);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 my $finSeedSens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 my $finSeedAntisens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 my $debSeedSens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 my $debSeedAntisens;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 my $info_gene="";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 if (($sensGeneAvant eq "+") and ($list[6] eq "+")) { #CTN ie geneavant == f, geneapres == f
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 $debSeedSens += $f5ff;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 $finSeedSens = $list[3]- $ff3f;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 $debSeedAntisens += $f3rf;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 $finSeedAntisens = $list[3]- $fr5f;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 } elsif (($sensGeneAvant eq "+") and ($list[6] eq "-")) { #CTN ie geneaavant == f, geneapres == r
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 $debSeedSens += $f5fr;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 $finSeedSens = $list[3]- $ff3r;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 $debSeedAntisens += $f3rr;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 $finSeedAntisens = $list[3]- $fr5r;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 } elsif (($sensGeneAvant eq "-") and ($list[6] eq "+")) { #CTN ie geneaavant == r, geneapres == f
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 $debSeedSens += $r5ff;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 $finSeedSens = $list[3]- $rf3f;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 $debSeedAntisens += $r3rf;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 $finSeedAntisens = $list[3]- $rr5f;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 } else { #CTN ie geneaavant == r, geneapres == r
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 $debSeedSens += $r5fr;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 $finSeedSens = $list[3]- $rf3r;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 $debSeedAntisens += $r3rr;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 $finSeedAntisens = $list[3]- $rr5r;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 if ($debSeedSens <= 0) { $debSeedSens=1 ; } # 1srt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 if ($debSeedAntisens <= 0) { $debSeedAntisens=1 ; }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 if($debSeedSens < $finSeedSens){ # only "real" seed
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 #print "$gene_avant\nNC_011753\tperso\tseed\t$deb_seed\t$fin_seed\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n$ligne\n\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 print S "$list[0]\tperso\tseedIR\t$debSeedSens\t$finSeedSens\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 if ($debSeedAntisens < $finSeedAntisens){
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 print S "$list[0]\tperso\tseedIR\t$debSeedAntisens\t$finSeedAntisens\t.\t-\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n";
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 $sensGeneAvant = $list[6] ; # GFF : column 6 gives strand
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 $debSeedSens = $list[4];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 $debSeedAntisens = $list[4];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 $info_gene = $list[@list-1];
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 }
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 close F;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 close S;
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 exit(0);
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 }