annotate bin/bowtie2eland.pl @ 5:2ebca9da5e42 draft default tip

planemo upload
author bioitcore
date Thu, 07 Sep 2017 17:39:24 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 use strict;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 my $bowtiefilename=$ARGV[0];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4 my $readsfilename=$ARGV[1];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 my $elandfilename=$ARGV[2];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 open(readsfile, $readsfilename);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 my $detectformat=`head -c 1 $readsfilename`;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 #my $firstletter=$detectformat;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12 #my $looplinenumbers=4;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 #$looplinenumbers=2 if ($detectformat eq ">");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15 open(bowtiefile, $bowtiefilename);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16 open(elandfile, ">".$elandfilename);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17 my $readfilelinenum=0;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 # hash the positions of the alignments for each read id
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 my %readposhash;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 my $bowtiepos = tell (bowtiefile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21 while (my $bowtieline=<bowtiefile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23 my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 if (not exists $readposhash{$bowtiereadname} )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 $readposhash{$bowtiereadname} = $bowtiepos;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 $bowtiepos = tell (bowtiefile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31 while(my $readline=<readsfile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33 $readfilelinenum++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34 if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 chomp($readline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 my $readname=substr($readline, 1, length($readline)-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 if( not exists $readposhash{$readname} )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 print elandfile $readname,"\tNA\tNM\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 my @mapped_ids=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 my @mapped_pos=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 my @mapped_strand=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 seek(bowtiefile, $readposhash{$readname}, 0);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 while (my $bowtieline=<bowtiefile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 if($readname eq $bowtiereadname)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 push(@mapped_ids, $mapped_id);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 push(@mapped_pos,$pos);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 push(@mapped_strand,$strand);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 last;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64 print elandfile $readname,"\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65 print elandfile "NA\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 print elandfile scalar(@mapped_ids),":0:0\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 for(my $i=0;$i<@mapped_ids;$i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69 print elandfile "/",$mapped_ids[$i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70 print elandfile ":",$mapped_pos[$i]+1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71 if($mapped_strand[$i] eq "+")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73 print elandfile "F0,";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 print elandfile "R0,";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 print elandfile "\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 close(elandfile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88 close(bowtiefile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89 close(readsfile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
90
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
91 exit;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
92 while(my $bowtieline=<bowtiefile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
93 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
94 my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
95 while(my $readline=<readsfile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
96 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
97 $readfilelinenum++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
98 if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
99 #if($readline=~/^$detectformat/)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
100 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
101 chomp($readline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
102 my $readname=substr($readline, 1, length($readline)-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
103
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
104
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
105 if($readname ne $bowtiereadname)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
106 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
107 print elandfile $readname,"\tNA\tNM\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
108 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
109 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
110 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
111 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
112 my @mapped_ids=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
113 my @mapped_pos=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
114 my @mapped_strand=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
115 push(@mapped_ids, $mapped_id);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
116 push(@mapped_pos,$pos);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
117 push(@mapped_strand,$strand);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
118 while(1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
119 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
120 $bowtieline=<bowtiefile>;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
121 my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
122 if( $bowtiereadname eq $readname )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
123 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
124 push(@mapped_ids, $mapped_id);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
125 push(@mapped_pos,$pos);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
126 push(@mapped_strand,$strand);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
127 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
128 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
129 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
130 seek(bowtiefile, -1*length($bowtieline),1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
131 print elandfile $readname,"\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
132 print elandfile "NA\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
133 print elandfile scalar(@mapped_ids),":0:0\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
134 for(my $i=0;$i<@mapped_ids;$i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
135 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
136 print elandfile "/",$mapped_ids[$i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
137 print elandfile ":",$mapped_pos[$i]+1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
138 if($mapped_strand[$i] eq "+")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
139 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
140 print elandfile "F0,";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
141 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
142 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
143 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
144 print elandfile "R0,";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
145 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
146
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
147 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
148 print elandfile "\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
149 last;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
150 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
151 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
152 last;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
153
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
154 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
155 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
156 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
157 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
158
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
159 while(my $readline=<readsfile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
160 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
161 $readfilelinenum++;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
162 if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1))
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
163 #if($readline=~/^$detectformat/)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
164 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
165 chomp($readline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
166 my $readname=substr($readline, 1, length($readline)-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
167 print elandfile $readname,"\tNA\tNM\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
168 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
169 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
170
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
171 close(elandfile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
172 close(bowtiefile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
173
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
174
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
175 close(readsfile);