annotate bin/rmap2eland.pl @ 4:cd336e593a92 draft

planemo upload
author bioitcore
date Thu, 07 Sep 2017 16:53:12 -0400
parents adc0f7765d85
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
1 use strict;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
2
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
3 my $rmapfilename=$ARGV[0];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
4 my $readsfilename=$ARGV[1];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
5 my $elandfilename=$ARGV[2];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
6
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
7 my $detectformat=`head -c 1 $readsfilename`;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
8
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
9 #system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
10 system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
11 system("sort -k4,4 $rmapfilename >$rmapfilename.sort");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
12
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
13
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
14 open(readsfile, $readsfilename.".sort");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
15
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
16
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
17
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
18 #$looplinenumbers=2 if ($detectformat eq ">");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
19 open(rmapfile, $rmapfilename.".sort");
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
20 open(elandfile, ">".$elandfilename);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
21
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
22 while(my $rmapline=<rmapfile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
23 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
24 chomp($rmapline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
25 my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
26 while(my $readline=<readsfile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
27 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
28 if($readline=~/^$detectformat/)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
29 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
30 chomp($readline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
31 my $readname=substr($readline, 1, length($readline)-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
32
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
33
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
34 if($readname ne $rmapreadname)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
35 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
36 print elandfile $readname,"\tNA\tNM\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
37 next;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
38 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
39 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
40 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
41 my @mapped_ids=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
42 my @mapped_pos=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
43 my @mapped_strand=();
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
44 push(@mapped_ids, $mapped_id);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
45 push(@mapped_pos,$start);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
46 push(@mapped_strand,$strand);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
47 while(1)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
48 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
49 $rmapline=<rmapfile>;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
50 chomp($rmapline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
51 ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
52 if( $rmapreadname eq $readname )
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
53 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
54 push(@mapped_ids, $mapped_id);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
55 push(@mapped_pos,$start);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
56 push(@mapped_strand,$strand);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
57 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
58 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
59 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
60 seek(rmapfile, -1*length($rmapline)-1,1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
61 print elandfile $readname,"\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
62 print elandfile "NA\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
63 print elandfile scalar(@mapped_ids),":0:0\t";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
64 for(my $i=0;$i<@mapped_ids;$i++)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
65 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
66 print elandfile "/",$mapped_ids[$i];
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
67 print elandfile ":",$mapped_pos[$i]+1;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
68 if($mapped_strand[$i] eq "+")
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
69 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
70 print elandfile "F0,";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
71 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
72 else
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
73 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
74 print elandfile "R0,";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
75 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
76
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
77 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
78 print elandfile "\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
79 last;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
80 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
81 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
82 last;
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
83
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
84 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
85 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
86 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
87 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
88
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
89 while(my $readline=<readsfile>)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
90 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
91 if($readline=~/^$detectformat/)
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
92 {
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
93 chomp($readline);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
94 my $readname=substr($readline, 1, length($readline)-1);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
95 print elandfile $readname,"\tNA\tNM\n";
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
96 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
97 }
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
98
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
99 close(elandfile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
100 close(rmapfile);
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
101
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
102
adc0f7765d85 planemo upload
bioitcore
parents:
diff changeset
103 close(readsfile);