changeset 0:cb1c17dddc14 default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 13:32:42 -0600
parents
children
files ExtendBedRegions.xml extend_bed_regions
diffstat 2 files changed, 92 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ExtendBedRegions.xml	Wed Mar 25 13:32:42 2015 -0600
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+
+<tool id="extend_bed_regions_1" name="Expand or contract regions">
+  <description>in a BED file by a fixed number of bases</description>
+  <version_string>echo 1.0.0</version_string>
+  <command interpreter="perl">extend_bed_regions $in_bed $extent $strand $out_bed</command>
+  <inputs>
+    <param format="bed" name="in_bed" type="data" label="BED file whose regions should be modified"/>
+    <param name="extent" type="integer" value="50" label="Number of bases to extend each BED region (negative values cause contraction)"/>
+    <param name="strand" type="select" display="radio" label="Which end to extend?" help="If the BED file contains strand info, extension will be strand specific. Otherwise, the positive strand is assumed">
+      <option value="5">5'</option>
+      <option value="3">3'</option>
+      <option value="both" selected="true">Both</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="out_bed" format="bed" type="data" label="BED file with extended/contracted regions"/>
+  </outputs>
+
+  <tests/>
+
+  <help>
+This tool expands each regions defined in a BED file by the specified amount, at the specified end(s). If expanded regions overlap, they are merged.
+ </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extend_bed_regions	Wed Mar 25 13:32:42 2015 -0600
@@ -0,0 +1,66 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+# This script adds up to X bases to either end of BED file regions (CAN BE NEGATIVE). The tool will merge regions that overlap after the expansion.
+@ARGV == 4 or die "Usage: $0 <input regions.bed> <# bases> <5|3|both> <output.bed>\nWhere 5, 3 or both controls which flank is modified\n";
+
+my $extension = $ARGV[1];
+my $ends = $ARGV[2];
+
+my %chrs;
+my @chr_names;
+open(BED, $ARGV[0])
+  or die "Cannot open $ARGV[0] for reading: $!\n";
+# assuming input in sorted by start per contig
+while(<BED>){
+  chomp;
+  my @F = split /\t/, $_;
+  my $strand = @F > 5 ? $F[5] : "+"; # is the strand field present?
+  my $start;
+  my $stop;
+  if($strand eq "+"){
+    if($ends eq "both" or $ends eq "5"){
+      $start = $F[1]-$extension;
+      $start = 1 if $start < 1;
+    }
+    if($ends eq "both" or $ends eq "3"){
+      $stop = $F[2]+$extension; # can't check if okay unless we knew the chr sizes...
+    }
+  }
+  else{
+    # negative strand
+    if($ends eq "both" or $ends eq "3"){
+      $start = $F[1]-$extension;
+      $start = 1 if $start < 1;
+    }
+    if($ends eq "both" or $ends eq "5"){
+      $stop = $F[2]+$extension; # can't check if okay unless we knew the chr sizes...
+    }
+  }
+  if(not exists $chrs{$F[0]}){
+    $chrs{$F[0]} = [[$start,$stop]];
+    push @chr_names, $F[0];
+    next;
+  }
+  my $last_interval = $chrs{$F[0]}->[$#{$chrs{$F[0]}}];
+  if($start <= $last_interval->[1]){ #overlaps previous
+    if($stop > $last_interval->[1]){ # extends beyond previous
+      $last_interval->[1] = $stop;
+    } #else it doesn't contribute new bases
+  }
+  else{ # standalone
+    push @{$chrs{$F[0]}}, [$start, $stop];
+  }
+}
+close(BED);
+
+open(OUTPUT_BED, ">$ARGV[3]")
+  or die "Cannot open $ARGV[3] for writing: $!\n";
+for my $chr_name (@chr_names){
+  for my $interval (@{$chrs{$chr_name}}){
+    print OUTPUT_BED join("\t",$chr_name,$interval->[0],$interval->[1]), "\n";
+  }
+}
+close(OUTPUT_BED);