Mercurial > repos > yusuf > extend_bed_regions
view extend_bed_regions @ 0:cb1c17dddc14 default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:32:42 -0600 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env perl use strict; use warnings; # This script adds up to X bases to either end of BED file regions (CAN BE NEGATIVE). The tool will merge regions that overlap after the expansion. @ARGV == 4 or die "Usage: $0 <input regions.bed> <# bases> <5|3|both> <output.bed>\nWhere 5, 3 or both controls which flank is modified\n"; my $extension = $ARGV[1]; my $ends = $ARGV[2]; my %chrs; my @chr_names; open(BED, $ARGV[0]) or die "Cannot open $ARGV[0] for reading: $!\n"; # assuming input in sorted by start per contig while(<BED>){ chomp; my @F = split /\t/, $_; my $strand = @F > 5 ? $F[5] : "+"; # is the strand field present? my $start; my $stop; if($strand eq "+"){ if($ends eq "both" or $ends eq "5"){ $start = $F[1]-$extension; $start = 1 if $start < 1; } if($ends eq "both" or $ends eq "3"){ $stop = $F[2]+$extension; # can't check if okay unless we knew the chr sizes... } } else{ # negative strand if($ends eq "both" or $ends eq "3"){ $start = $F[1]-$extension; $start = 1 if $start < 1; } if($ends eq "both" or $ends eq "5"){ $stop = $F[2]+$extension; # can't check if okay unless we knew the chr sizes... } } if(not exists $chrs{$F[0]}){ $chrs{$F[0]} = [[$start,$stop]]; push @chr_names, $F[0]; next; } my $last_interval = $chrs{$F[0]}->[$#{$chrs{$F[0]}}]; if($start <= $last_interval->[1]){ #overlaps previous if($stop > $last_interval->[1]){ # extends beyond previous $last_interval->[1] = $stop; } #else it doesn't contribute new bases } else{ # standalone push @{$chrs{$F[0]}}, [$start, $stop]; } } close(BED); open(OUTPUT_BED, ">$ARGV[3]") or die "Cannot open $ARGV[3] for writing: $!\n"; for my $chr_name (@chr_names){ for my $interval (@{$chrs{$chr_name}}){ print OUTPUT_BED join("\t",$chr_name,$interval->[0],$interval->[1]), "\n"; } } close(OUTPUT_BED);