Mercurial > repos > portiahollyoak > temp
comparison scripts/mergeTagsWithGap.pl @ 0:28d1a6f8143f draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author | portiahollyoak |
---|---|
date | Mon, 25 Apr 2016 13:08:56 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:28d1a6f8143f |
---|---|
1 #!/share/bin/perl | |
2 #chr2L 114333 114409 FBgn0003055_P-element harwich,1; + antisense | |
3 #chr2L 114443 114567 FBgn0003055_P-element harwich,3; + antisense | |
4 #chr2L 114636 114712 FBgn0003055_P-element harwich,1; - antisense | |
5 #chr2L 131640 131929 FBgn0003055_P-element harwich,42; + sense | |
6 #chr2L 131948 132274 FBgn0003055_P-element harwich,18; - sense | |
7 #chr2L 132027 132103 FBgn0003055_P-element harwich,1; - antisense | |
8 | |
9 use warnings; | |
10 use strict; | |
11 use List::Util qw(max min); | |
12 | |
13 if(scalar(@ARGV)<2 || grep {/^-h/} @ARGV) | |
14 { | |
15 die " | |
16 usage: mergeOverlapBed4.pl inputFile | |
17 Expects BED input with at least 4 fields. For each {chr,name} pair, | |
18 merges overlapping ranges and prints out sorted BED4 to stdout. | |
19 inputFile can be - or stdin to read from stdin. | |
20 "; | |
21 } | |
22 | |
23 my $input=shift @ARGV; | |
24 my $maxgap=shift @ARGV; | |
25 grep {s/^stdin$/-/i} $input; | |
26 | |
27 my %item2coords; | |
28 open IN,$input; | |
29 while (<IN>) | |
30 { | |
31 chomp; | |
32 my ($chrom,$start,$end,$transposonName,$count,$strand,$transposonStrand)=split/\t/; | |
33 push @{$item2coords{"$chrom;$transposonName;$transposonStrand"}},[$start,$end,$count,$strand]; | |
34 } | |
35 close IN; | |
36 | |
37 my @results; | |
38 foreach my $item (keys %item2coords) | |
39 { | |
40 my @sortedCoords=sort{ $a->[0]<=>$b->[0] } @{$item2coords{$item}}; | |
41 my ($chrom,$tName,$tStrand)=split(/;/,$item); | |
42 my ($mergeStart,$mergeEnd,$mergeCounts,$mergeStrand)=@{shift @sortedCoords}; | |
43 my %sampleCounts=(); | |
44 my ($breakStart,$breakEnd)=0; | |
45 foreach my $sa (split/;/,$mergeCounts) | |
46 { | |
47 my ($s,$c)=split/,/,$sa; | |
48 $sampleCounts{$s}{$mergeStrand}=$c; | |
49 } | |
50 foreach my $rangeRef (@sortedCoords) | |
51 { | |
52 my ($rangeStart,$rangeEnd,$rangeCounts,$rangeStrand)=@{$rangeRef}; | |
53 if($mergeStrand=~/\Q$rangeStrand\E$/) | |
54 { | |
55 if($rangeStart>=$mergeEnd+$maxgap) | |
56 { | |
57 $mergeCounts=""; | |
58 foreach my $s (keys %sampleCounts) | |
59 { | |
60 $mergeCounts.=$s; | |
61 $mergeCounts.=",".$_.$sampleCounts{$s}{$_} foreach keys %{$sampleCounts{$s}}; | |
62 $mergeCounts.=";"; | |
63 } | |
64 if($mergeStrand eq "+") | |
65 { | |
66 $breakStart=$mergeEnd; | |
67 $breakEnd=$mergeEnd+$maxgap; | |
68 } | |
69 if($mergeStrand eq "-") | |
70 { | |
71 $breakStart=$mergeStart-$maxgap; | |
72 $breakEnd=$mergeStart; | |
73 } | |
74 push @results,[$chrom,$mergeStart,$mergeEnd,$tName,$mergeCounts,$mergeStrand,$tStrand,"$chrom:$breakStart.$breakEnd"]; | |
75 ($mergeStart,$mergeEnd,$mergeStrand)=($rangeStart,$rangeEnd,$rangeStrand); | |
76 %sampleCounts=(); | |
77 foreach my $sa (split/;/,$rangeCounts) | |
78 { | |
79 my ($s,$c)=split/,/,$sa; | |
80 $sampleCounts{$s}{$rangeStrand}=$c; | |
81 } | |
82 } | |
83 else | |
84 { | |
85 $mergeEnd=max($rangeEnd,$mergeEnd); | |
86 foreach my $sa (split/;/,$rangeCounts) | |
87 { | |
88 my ($s,$c)=split/,/,$sa; | |
89 $sampleCounts{$s}{$rangeStrand}+=$c; | |
90 } | |
91 } | |
92 } | |
93 elsif($rangeStrand eq "+") | |
94 { | |
95 $mergeCounts=""; | |
96 foreach my $s (keys %sampleCounts) | |
97 { | |
98 $mergeCounts.=$s; | |
99 $mergeCounts.=",".$_.$sampleCounts{$s}{$_} foreach keys %{$sampleCounts{$s}}; | |
100 $mergeCounts.=";"; | |
101 } | |
102 if($mergeStrand eq "+") | |
103 { | |
104 $breakStart=$mergeEnd; | |
105 $breakEnd=$mergeEnd+$maxgap; | |
106 } | |
107 if($mergeStrand eq "-") | |
108 { | |
109 $breakStart=$mergeStart-$maxgap; | |
110 $breakEnd=$mergeStart; | |
111 } | |
112 push @results,[$chrom,$mergeStart,$mergeEnd,$tName,$mergeCounts,$mergeStrand,$tStrand,"$chrom:$breakStart.$breakEnd"]; | |
113 ($mergeStart,$mergeEnd,$mergeStrand)=($rangeStart,$rangeEnd,$rangeStrand); | |
114 %sampleCounts=(); | |
115 foreach my $sa (split/;/,$rangeCounts) | |
116 { | |
117 my ($s,$c)=split/,/,$sa; | |
118 $sampleCounts{$s}{$rangeStrand}=$c; | |
119 } | |
120 } | |
121 else | |
122 { | |
123 if($rangeStart>=$mergeEnd+$maxgap*2) | |
124 { | |
125 $mergeCounts=""; | |
126 foreach my $s (keys %sampleCounts) | |
127 { | |
128 $mergeCounts.=$s; | |
129 $mergeCounts.=",".$_.$sampleCounts{$s}{$_} foreach keys %{$sampleCounts{$s}}; | |
130 $mergeCounts.=";"; | |
131 } | |
132 if($mergeStrand eq "+") | |
133 { | |
134 $breakStart=$mergeEnd; | |
135 $breakEnd=$mergeEnd+$maxgap; | |
136 } | |
137 if($mergeStrand eq "-") | |
138 { | |
139 $breakStart=$mergeStart-$maxgap; | |
140 $breakEnd=$mergeStart; | |
141 } | |
142 push @results,[$chrom,$mergeStart,$mergeEnd,$tName,$mergeCounts,$mergeStrand,$tStrand,"$chrom:$breakStart.$breakEnd"]; | |
143 ($mergeStart,$mergeEnd,$mergeStrand)=($rangeStart,$rangeEnd,$rangeStrand); | |
144 %sampleCounts=(); | |
145 foreach my $sa (split/;/,$rangeCounts) | |
146 { | |
147 my ($s,$c)=split/,/,$sa; | |
148 $sampleCounts{$s}{$rangeStrand}=$c; | |
149 } | |
150 } | |
151 else | |
152 { | |
153 $breakStart=$mergeEnd; | |
154 $mergeEnd=max($rangeEnd,$mergeEnd); | |
155 $breakEnd=$rangeStart; | |
156 foreach my $sa (split/;/,$rangeCounts) | |
157 { | |
158 my ($s,$c)=split/,/,$sa; | |
159 $sampleCounts{$s}{$rangeStrand}+=$c; | |
160 } | |
161 $mergeStrand.=$rangeStrand; | |
162 } | |
163 } | |
164 } | |
165 $mergeCounts=""; | |
166 foreach my $s (keys %sampleCounts) | |
167 { | |
168 $mergeCounts.=$s; | |
169 $mergeCounts.=",".$_.$sampleCounts{$s}{$_} foreach keys %{$sampleCounts{$s}}; | |
170 $mergeCounts.=";"; | |
171 } | |
172 if($mergeStrand eq "+") | |
173 { | |
174 $breakStart=$mergeEnd; | |
175 $breakEnd=$mergeEnd+$maxgap; | |
176 } | |
177 if($mergeStrand eq "-") | |
178 { | |
179 $breakStart=$mergeStart-$maxgap; | |
180 $breakEnd=$mergeStart; | |
181 } | |
182 push @results,[$chrom,$mergeStart,$mergeEnd,$tName,$mergeCounts,$mergeStrand,$tStrand,"$chrom:$breakStart.$breakEnd"] if $mergeEnd; | |
183 } | |
184 | |
185 sub bed4Cmp | |
186 { | |
187 # For sorting by chrom, chromStart, and names -- reverse order for names | |
188 return $a->[0] cmp $b->[0] || | |
189 $a->[1] <=> $b->[1] || | |
190 $b->[3] cmp $a->[3]; | |
191 } | |
192 | |
193 foreach my $r (sort bed4Cmp @results) | |
194 { | |
195 print join("\t",@{$r}),"\n"; | |
196 } |